From 769c78aaef312f085810add87867bf9c11bccb19 Mon Sep 17 00:00:00 2001 From: amertensreplit <49614208-amertensreplit@users.noreply.replit.com> Date: Wed, 10 Jun 2026 19:53:15 +0000 Subject: [PATCH] Add unit tests for the skill upload parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Task #18: Automatically test that uploaded skill files are read correctly. The skill parser (artifacts/api-server/src/lib/skillParser.ts) had no automated tests. A regression there could silently mis-read uploads. Added a new Vitest suite covering the parsing/classification logic (NOT the ZIP size/safety limits, which are tracked by a separate task). New file: artifacts/api-server/src/lib/skillParser.test.ts Coverage: - parseSingleFile: kind/language/hash/size/isBinary for .md, .sh, .py, .json, .txt, unknown ext, and a binary blob; path normalisation (dir strip, backslashes); case-insensitive SKILL.md. - parseText: wraps pasted text as markdown SKILL.md; byte-length sizing for multi-byte content. - parseZip (in-memory ZIP via fflate.zipSync): correct classification, nested path preservation, __MACOSX/.git/node_modules skipping, dir/empty entry skipping, binary-vs-text handling, stable hashing. - deriveScanName: H1 from SKILL.md, name: front-matter fallback, quote stripping, H1 preferred over front-matter, top-dir fallback, provided fallback, 120-char truncation. Verification: `pnpm --filter @workspace/api-server run test` → 59 passed (24 new). Typecheck of the new test file is clean; pre-existing typecheck errors in src/routes/scans.ts are unrelated and out of scope. Replit-Task-Id: 06f18e6a-2d8d-4bf2-b2ae-29675f04c059 --- .../api-server/src/lib/skillParser.test.ts | 244 ++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 artifacts/api-server/src/lib/skillParser.test.ts diff --git a/artifacts/api-server/src/lib/skillParser.test.ts b/artifacts/api-server/src/lib/skillParser.test.ts new file mode 100644 index 0000000..14cb028 --- /dev/null +++ b/artifacts/api-server/src/lib/skillParser.test.ts @@ -0,0 +1,244 @@ +import { describe, it, expect } from "vitest"; +import { zipSync, strToU8 } from "fflate"; +import { + parseSingleFile, + parseText, + parseZip, + deriveScanName, +} from "./skillParser"; +import { hashBytes } from "./skillFingerprint"; +import type { ParsedFile } from "./ruleCatalog"; + +function file(overrides: Partial): ParsedFile { + return { + path: "SKILL.md", + kind: "instruction", + language: "markdown", + content: "", + size: 0, + hash: "", + isBinary: false, + ...overrides, + }; +} + +describe("parseSingleFile", () => { + it("classifies a markdown file as instruction", () => { + const buf = Buffer.from("# Hello\n\nsome text", "utf-8"); + const parsed = parseSingleFile("SKILL.md", buf); + expect(parsed.path).toBe("SKILL.md"); + expect(parsed.kind).toBe("instruction"); + expect(parsed.language).toBe("markdown"); + expect(parsed.isBinary).toBe(false); + expect(parsed.content).toBe("# Hello\n\nsome text"); + expect(parsed.size).toBe(buf.length); + expect(parsed.hash).toBe(hashBytes(buf)); + }); + + it("classifies a shell script as a script with shell language", () => { + const buf = Buffer.from("#!/bin/sh\necho hi", "utf-8"); + const parsed = parseSingleFile("run.sh", buf); + expect(parsed.kind).toBe("script"); + expect(parsed.language).toBe("shell"); + expect(parsed.isBinary).toBe(false); + }); + + it("classifies a python script as a script with python language", () => { + const parsed = parseSingleFile("main.py", Buffer.from("print('hi')", "utf-8")); + expect(parsed.kind).toBe("script"); + expect(parsed.language).toBe("python"); + }); + + it("classifies a json file as a resource", () => { + const parsed = parseSingleFile("data.json", Buffer.from("{}", "utf-8")); + expect(parsed.kind).toBe("resource"); + expect(parsed.language).toBe("json"); + }); + + it("treats a .txt file as an instruction", () => { + const parsed = parseSingleFile("notes.txt", Buffer.from("hello", "utf-8")); + expect(parsed.kind).toBe("instruction"); + expect(parsed.language).toBe("text"); + }); + + it("leaves language null for an unknown extension", () => { + const parsed = parseSingleFile("image.xyz", Buffer.from("plain", "utf-8")); + expect(parsed.kind).toBe("resource"); + expect(parsed.language).toBeNull(); + }); + + it("detects binary content and stores no text content", () => { + const buf = Buffer.from([0x00, 0x01, 0x02, 0xff, 0xfe, 0x00]); + const parsed = parseSingleFile("blob.bin", buf); + expect(parsed.isBinary).toBe(true); + expect(parsed.kind).toBe("resource"); + expect(parsed.language).toBeNull(); + expect(parsed.content).toBe(""); + expect(parsed.size).toBe(buf.length); + expect(parsed.hash).toBe(hashBytes(buf)); + }); + + it("strips directory components from the filename", () => { + const parsed = parseSingleFile("some/dir/run.sh", Buffer.from("x", "utf-8")); + expect(parsed.path).toBe("run.sh"); + }); + + it("normalises backslash paths", () => { + const parsed = parseSingleFile("a\\b\\run.sh", Buffer.from("x", "utf-8")); + expect(parsed.path).toBe("run.sh"); + }); + + it("treats SKILL.md as instruction regardless of casing", () => { + const parsed = parseSingleFile("skill.md", Buffer.from("# Title", "utf-8")); + expect(parsed.kind).toBe("instruction"); + }); +}); + +describe("parseText", () => { + it("wraps pasted text as a markdown SKILL.md instruction", () => { + const text = "# Pasted Skill\n\nbody"; + const parsed = parseText(text); + expect(parsed.path).toBe("SKILL.md"); + expect(parsed.kind).toBe("instruction"); + expect(parsed.language).toBe("markdown"); + expect(parsed.isBinary).toBe(false); + expect(parsed.content).toBe(text); + expect(parsed.size).toBe(Buffer.byteLength(text, "utf-8")); + expect(parsed.hash).toBe(hashBytes(Buffer.from(text, "utf-8"))); + }); + + it("counts byte length (not character length) for multi-byte content", () => { + const text = "café"; + const parsed = parseText(text); + expect(parsed.size).toBe(Buffer.byteLength(text, "utf-8")); + expect(parsed.size).toBeGreaterThan(text.length); + }); +}); + +describe("parseZip", () => { + it("extracts text and script files with correct classification", () => { + const zip = zipSync({ + "SKILL.md": strToU8("# My Skill\n"), + "scripts/run.sh": strToU8("#!/bin/sh\necho hi\n"), + "data.json": strToU8("{\"a\":1}"), + }); + const files = parseZip(Buffer.from(zip)); + const byPath = Object.fromEntries(files.map((f) => [f.path, f])); + + expect(files).toHaveLength(3); + expect(byPath["SKILL.md"].kind).toBe("instruction"); + expect(byPath["SKILL.md"].language).toBe("markdown"); + expect(byPath["scripts/run.sh"].kind).toBe("script"); + expect(byPath["scripts/run.sh"].language).toBe("shell"); + expect(byPath["data.json"].kind).toBe("resource"); + expect(byPath["data.json"].language).toBe("json"); + }); + + it("normalises nested paths and preserves directory structure", () => { + const zip = zipSync({ + "skill/nested/deep/notes.md": strToU8("notes"), + }); + const files = parseZip(Buffer.from(zip)); + expect(files).toHaveLength(1); + expect(files[0].path).toBe("skill/nested/deep/notes.md"); + expect(files[0].kind).toBe("instruction"); + }); + + it("skips __MACOSX, .git and node_modules entries", () => { + const zip = zipSync({ + "SKILL.md": strToU8("# Skill"), + "__MACOSX/._SKILL.md": strToU8("junk"), + ".git/config": strToU8("[core]"), + "node_modules/dep/index.js": strToU8("module.exports = 1"), + }); + const files = parseZip(Buffer.from(zip)); + expect(files.map((f) => f.path)).toEqual(["SKILL.md"]); + }); + + it("ignores directory entries and empty files", () => { + const zip = zipSync({ + "emptydir/": strToU8(""), + "empty.txt": strToU8(""), + "real.md": strToU8("content"), + }); + const files = parseZip(Buffer.from(zip)); + expect(files.map((f) => f.path)).toEqual(["real.md"]); + }); + + it("handles binary content inside the archive", () => { + const binary = new Uint8Array([0x00, 0x01, 0x02, 0x03, 0xff, 0x00, 0xfe]); + const zip = zipSync({ + "logo.png": binary, + "README.md": strToU8("# Readme"), + }); + const files = parseZip(Buffer.from(zip)); + const byPath = Object.fromEntries(files.map((f) => [f.path, f])); + + expect(byPath["logo.png"].isBinary).toBe(true); + expect(byPath["logo.png"].kind).toBe("resource"); + expect(byPath["logo.png"].content).toBe(""); + expect(byPath["logo.png"].language).toBeNull(); + expect(byPath["README.md"].isBinary).toBe(false); + expect(byPath["README.md"].content).toBe("# Readme"); + }); + + it("computes a stable hash matching hashBytes of the raw content", () => { + const content = "# Stable hash check"; + const zip = zipSync({ "SKILL.md": strToU8(content) }); + const files = parseZip(Buffer.from(zip)); + expect(files[0].hash).toBe(hashBytes(Buffer.from(content, "utf-8"))); + }); +}); + +describe("deriveScanName", () => { + it("uses the H1 heading from SKILL.md", () => { + const files = [ + file({ path: "skill/SKILL.md", content: "# My Awesome Skill\n\nbody" }), + ]; + expect(deriveScanName(files, "fallback")).toBe("My Awesome Skill"); + }); + + it("falls back to the name: front-matter when there is no H1", () => { + const files = [ + file({ + path: "SKILL.md", + content: "---\nname: Front Matter Skill\n---\nbody", + }), + ]; + expect(deriveScanName(files, "fallback")).toBe("Front Matter Skill"); + }); + + it("strips surrounding quotes from front-matter names", () => { + const files = [ + file({ path: "SKILL.md", content: 'name: "Quoted Name"\n' }), + ]; + expect(deriveScanName(files, "fallback")).toBe("Quoted Name"); + }); + + it("prefers the H1 over the front-matter name", () => { + const files = [ + file({ + path: "SKILL.md", + content: "---\nname: Front Matter\n---\n# Heading Wins\n", + }), + ]; + expect(deriveScanName(files, "fallback")).toBe("Heading Wins"); + }); + + it("falls back to the top-level directory when SKILL.md has no title", () => { + const files = [ + file({ path: "my-skill/scripts/run.sh", content: "echo hi" }), + ]; + expect(deriveScanName(files, "fallback")).toBe("my-skill"); + }); + + it("uses the provided fallback when nothing else is available", () => { + expect(deriveScanName([], "the-fallback")).toBe("the-fallback"); + }); + + it("truncates very long names to 120 characters", () => { + const long = "x".repeat(200); + const files = [file({ path: "SKILL.md", content: `# ${long}` })]; + expect(deriveScanName(files, "fallback")).toHaveLength(120); + }); +});