Add unit tests for the skill upload parser

Task #18: Automatically test that uploaded skill files are read correctly. The skill parser (artifacts/api-server/src/lib/skillParser.ts) had no automated tests. A regression there could silently mis-read uploads. Added a new Vitest suite covering the parsing/classification logic (NOT the ZIP size/safety limits, which are tracked by a separate task). New file: artifacts/api-server/src/lib/skillParser.test.ts Coverage: - parseSingleFile: kind/language/hash/size/isBinary for .md, .sh, .py, .json, .txt, unknown ext, and a binary blob; path normalisation (dir strip, backslashes); case-insensitive SKILL.md. - parseText: wraps pasted text as markdown SKILL.md; byte-length sizing for multi-byte content. - parseZip (in-memory ZIP via fflate.zipSync): correct classification, nested path preservation, __MACOSX/.git/node_modules skipping, dir/empty entry skipping, binary-vs-text handling, stable hashing. - deriveScanName: H1 from SKILL.md, name: front-matter fallback, quote stripping, H1 preferred over front-matter, top-dir fallback, provided fallback, 120-char truncation. Verification: `pnpm --filter @workspace/api-server run test` → 59 passed (24 new). Typecheck of the new test file is clean; pre-existing typecheck errors in src/routes/scans.ts are unrelated and out of scope. Replit-Task-Id: 06f18e6a-2d8d-4bf2-b2ae-29675f04c059
2026-06-10 19:53:15 +00:00 · 2026-06-10 19:53:15 +00:00 · 769c78aaef
commit 769c78aaef
parent 532f42117f
1 changed files with 244 additions and 0 deletions
--- a/artifacts/api-server/src/lib/skillParser.test.ts
+++ b/artifacts/api-server/src/lib/skillParser.test.ts
@ -0,0 +1,244 @@
+import { describe, it, expect } from "vitest";
+import { zipSync, strToU8 } from "fflate";
+import {
+  parseSingleFile,
+  parseText,
+  parseZip,
+  deriveScanName,
+} from "./skillParser";
+import { hashBytes } from "./skillFingerprint";
+import type { ParsedFile } from "./ruleCatalog";
+
+function file(overrides: Partial<ParsedFile>): ParsedFile {
+  return {
+    path: "SKILL.md",
+    kind: "instruction",
+    language: "markdown",
+    content: "",
+    size: 0,
+    hash: "",
+    isBinary: false,
+    ...overrides,
+  };
+}
+
+describe("parseSingleFile", () => {
+  it("classifies a markdown file as instruction", () => {
+    const buf = Buffer.from("# Hello\n\nsome text", "utf-8");
+    const parsed = parseSingleFile("SKILL.md", buf);
+    expect(parsed.path).toBe("SKILL.md");
+    expect(parsed.kind).toBe("instruction");
+    expect(parsed.language).toBe("markdown");
+    expect(parsed.isBinary).toBe(false);
+    expect(parsed.content).toBe("# Hello\n\nsome text");
+    expect(parsed.size).toBe(buf.length);
+    expect(parsed.hash).toBe(hashBytes(buf));
+  });
+
+  it("classifies a shell script as a script with shell language", () => {
+    const buf = Buffer.from("#!/bin/sh\necho hi", "utf-8");
+    const parsed = parseSingleFile("run.sh", buf);
+    expect(parsed.kind).toBe("script");
+    expect(parsed.language).toBe("shell");
+    expect(parsed.isBinary).toBe(false);
+  });
+
+  it("classifies a python script as a script with python language", () => {
+    const parsed = parseSingleFile("main.py", Buffer.from("print('hi')", "utf-8"));
+    expect(parsed.kind).toBe("script");
+    expect(parsed.language).toBe("python");
+  });
+
+  it("classifies a json file as a resource", () => {
+    const parsed = parseSingleFile("data.json", Buffer.from("{}", "utf-8"));
+    expect(parsed.kind).toBe("resource");
+    expect(parsed.language).toBe("json");
+  });
+
+  it("treats a .txt file as an instruction", () => {
+    const parsed = parseSingleFile("notes.txt", Buffer.from("hello", "utf-8"));
+    expect(parsed.kind).toBe("instruction");
+    expect(parsed.language).toBe("text");
+  });
+
+  it("leaves language null for an unknown extension", () => {
+    const parsed = parseSingleFile("image.xyz", Buffer.from("plain", "utf-8"));
+    expect(parsed.kind).toBe("resource");
+    expect(parsed.language).toBeNull();
+  });
+
+  it("detects binary content and stores no text content", () => {
+    const buf = Buffer.from([0x00, 0x01, 0x02, 0xff, 0xfe, 0x00]);
+    const parsed = parseSingleFile("blob.bin", buf);
+    expect(parsed.isBinary).toBe(true);
+    expect(parsed.kind).toBe("resource");
+    expect(parsed.language).toBeNull();
+    expect(parsed.content).toBe("");
+    expect(parsed.size).toBe(buf.length);
+    expect(parsed.hash).toBe(hashBytes(buf));
+  });
+
+  it("strips directory components from the filename", () => {
+    const parsed = parseSingleFile("some/dir/run.sh", Buffer.from("x", "utf-8"));
+    expect(parsed.path).toBe("run.sh");
+  });
+
+  it("normalises backslash paths", () => {
+    const parsed = parseSingleFile("a\\b\\run.sh", Buffer.from("x", "utf-8"));
+    expect(parsed.path).toBe("run.sh");
+  });
+
+  it("treats SKILL.md as instruction regardless of casing", () => {
+    const parsed = parseSingleFile("skill.md", Buffer.from("# Title", "utf-8"));
+    expect(parsed.kind).toBe("instruction");
+  });
+});
+
+describe("parseText", () => {
+  it("wraps pasted text as a markdown SKILL.md instruction", () => {
+    const text = "# Pasted Skill\n\nbody";
+    const parsed = parseText(text);
+    expect(parsed.path).toBe("SKILL.md");
+    expect(parsed.kind).toBe("instruction");
+    expect(parsed.language).toBe("markdown");
+    expect(parsed.isBinary).toBe(false);
+    expect(parsed.content).toBe(text);
+    expect(parsed.size).toBe(Buffer.byteLength(text, "utf-8"));
+    expect(parsed.hash).toBe(hashBytes(Buffer.from(text, "utf-8")));
+  });
+
+  it("counts byte length (not character length) for multi-byte content", () => {
+    const text = "café";
+    const parsed = parseText(text);
+    expect(parsed.size).toBe(Buffer.byteLength(text, "utf-8"));
+    expect(parsed.size).toBeGreaterThan(text.length);
+  });
+});
+
+describe("parseZip", () => {
+  it("extracts text and script files with correct classification", () => {
+    const zip = zipSync({
+      "SKILL.md": strToU8("# My Skill\n"),
+      "scripts/run.sh": strToU8("#!/bin/sh\necho hi\n"),
+      "data.json": strToU8("{\"a\":1}"),
+    });
+    const files = parseZip(Buffer.from(zip));
+    const byPath = Object.fromEntries(files.map((f) => [f.path, f]));
+
+    expect(files).toHaveLength(3);
+    expect(byPath["SKILL.md"].kind).toBe("instruction");
+    expect(byPath["SKILL.md"].language).toBe("markdown");
+    expect(byPath["scripts/run.sh"].kind).toBe("script");
+    expect(byPath["scripts/run.sh"].language).toBe("shell");
+    expect(byPath["data.json"].kind).toBe("resource");
+    expect(byPath["data.json"].language).toBe("json");
+  });
+
+  it("normalises nested paths and preserves directory structure", () => {
+    const zip = zipSync({
+      "skill/nested/deep/notes.md": strToU8("notes"),
+    });
+    const files = parseZip(Buffer.from(zip));
+    expect(files).toHaveLength(1);
+    expect(files[0].path).toBe("skill/nested/deep/notes.md");
+    expect(files[0].kind).toBe("instruction");
+  });
+
+  it("skips __MACOSX, .git and node_modules entries", () => {
+    const zip = zipSync({
+      "SKILL.md": strToU8("# Skill"),
+      "__MACOSX/._SKILL.md": strToU8("junk"),
+      ".git/config": strToU8("[core]"),
+      "node_modules/dep/index.js": strToU8("module.exports = 1"),
+    });
+    const files = parseZip(Buffer.from(zip));
+    expect(files.map((f) => f.path)).toEqual(["SKILL.md"]);
+  });
+
+  it("ignores directory entries and empty files", () => {
+    const zip = zipSync({
+      "emptydir/": strToU8(""),
+      "empty.txt": strToU8(""),
+      "real.md": strToU8("content"),
+    });
+    const files = parseZip(Buffer.from(zip));
+    expect(files.map((f) => f.path)).toEqual(["real.md"]);
+  });
+
+  it("handles binary content inside the archive", () => {
+    const binary = new Uint8Array([0x00, 0x01, 0x02, 0x03, 0xff, 0x00, 0xfe]);
+    const zip = zipSync({
+      "logo.png": binary,
+      "README.md": strToU8("# Readme"),
+    });
+    const files = parseZip(Buffer.from(zip));
+    const byPath = Object.fromEntries(files.map((f) => [f.path, f]));
+
+    expect(byPath["logo.png"].isBinary).toBe(true);
+    expect(byPath["logo.png"].kind).toBe("resource");
+    expect(byPath["logo.png"].content).toBe("");
+    expect(byPath["logo.png"].language).toBeNull();
+    expect(byPath["README.md"].isBinary).toBe(false);
+    expect(byPath["README.md"].content).toBe("# Readme");
+  });
+
+  it("computes a stable hash matching hashBytes of the raw content", () => {
+    const content = "# Stable hash check";
+    const zip = zipSync({ "SKILL.md": strToU8(content) });
+    const files = parseZip(Buffer.from(zip));
+    expect(files[0].hash).toBe(hashBytes(Buffer.from(content, "utf-8")));
+  });
+});
+
+describe("deriveScanName", () => {
+  it("uses the H1 heading from SKILL.md", () => {
+    const files = [
+      file({ path: "skill/SKILL.md", content: "# My Awesome Skill\n\nbody" }),
+    ];
+    expect(deriveScanName(files, "fallback")).toBe("My Awesome Skill");
+  });
+
+  it("falls back to the name: front-matter when there is no H1", () => {
+    const files = [
+      file({
+        path: "SKILL.md",
+        content: "---\nname: Front Matter Skill\n---\nbody",
+      }),
+    ];
+    expect(deriveScanName(files, "fallback")).toBe("Front Matter Skill");
+  });
+
+  it("strips surrounding quotes from front-matter names", () => {
+    const files = [
+      file({ path: "SKILL.md", content: 'name: "Quoted Name"\n' }),
+    ];
+    expect(deriveScanName(files, "fallback")).toBe("Quoted Name");
+  });
+
+  it("prefers the H1 over the front-matter name", () => {
+    const files = [
+      file({
+        path: "SKILL.md",
+        content: "---\nname: Front Matter\n---\n# Heading Wins\n",
+      }),
+    ];
+    expect(deriveScanName(files, "fallback")).toBe("Heading Wins");
+  });
+
+  it("falls back to the top-level directory when SKILL.md has no title", () => {
+    const files = [
+      file({ path: "my-skill/scripts/run.sh", content: "echo hi" }),
+    ];
+    expect(deriveScanName(files, "fallback")).toBe("my-skill");
+  });
+
+  it("uses the provided fallback when nothing else is available", () => {
+    expect(deriveScanName([], "the-fallback")).toBe("the-fallback");
+  });
+
+  it("truncates very long names to 120 characters", () => {
+    const long = "x".repeat(200);
+    const files = [file({ path: "SKILL.md", content: `# ${long}` })];
+    expect(deriveScanName(files, "fallback")).toHaveLength(120);
+  });
+});