Add unit tests for the skill upload parser

Task #18: Automatically test that uploaded skill files are read correctly.

The skill parser (artifacts/api-server/src/lib/skillParser.ts) had no automated
tests. A regression there could silently mis-read uploads. Added a new Vitest
suite covering the parsing/classification logic (NOT the ZIP size/safety limits,
which are tracked by a separate task).

New file: artifacts/api-server/src/lib/skillParser.test.ts

Coverage:
- parseSingleFile: kind/language/hash/size/isBinary for .md, .sh, .py, .json,
  .txt, unknown ext, and a binary blob; path normalisation (dir strip,
  backslashes); case-insensitive SKILL.md.
- parseText: wraps pasted text as markdown SKILL.md; byte-length sizing for
  multi-byte content.
- parseZip (in-memory ZIP via fflate.zipSync): correct classification, nested
  path preservation, __MACOSX/.git/node_modules skipping, dir/empty entry
  skipping, binary-vs-text handling, stable hashing.
- deriveScanName: H1 from SKILL.md, name: front-matter fallback, quote
  stripping, H1 preferred over front-matter, top-dir fallback, provided
  fallback, 120-char truncation.

Verification: `pnpm --filter @workspace/api-server run test` → 59 passed
(24 new). Typecheck of the new test file is clean; pre-existing typecheck
errors in src/routes/scans.ts are unrelated and out of scope.

Replit-Task-Id: 06f18e6a-2d8d-4bf2-b2ae-29675f04c059
This commit is contained in:
amertensreplit 2026-06-10 19:53:15 +00:00
parent 532f42117f
commit 769c78aaef

View file

@ -0,0 +1,244 @@
import { describe, it, expect } from "vitest";
import { zipSync, strToU8 } from "fflate";
import {
parseSingleFile,
parseText,
parseZip,
deriveScanName,
} from "./skillParser";
import { hashBytes } from "./skillFingerprint";
import type { ParsedFile } from "./ruleCatalog";
function file(overrides: Partial<ParsedFile>): ParsedFile {
return {
path: "SKILL.md",
kind: "instruction",
language: "markdown",
content: "",
size: 0,
hash: "",
isBinary: false,
...overrides,
};
}
describe("parseSingleFile", () => {
it("classifies a markdown file as instruction", () => {
const buf = Buffer.from("# Hello\n\nsome text", "utf-8");
const parsed = parseSingleFile("SKILL.md", buf);
expect(parsed.path).toBe("SKILL.md");
expect(parsed.kind).toBe("instruction");
expect(parsed.language).toBe("markdown");
expect(parsed.isBinary).toBe(false);
expect(parsed.content).toBe("# Hello\n\nsome text");
expect(parsed.size).toBe(buf.length);
expect(parsed.hash).toBe(hashBytes(buf));
});
it("classifies a shell script as a script with shell language", () => {
const buf = Buffer.from("#!/bin/sh\necho hi", "utf-8");
const parsed = parseSingleFile("run.sh", buf);
expect(parsed.kind).toBe("script");
expect(parsed.language).toBe("shell");
expect(parsed.isBinary).toBe(false);
});
it("classifies a python script as a script with python language", () => {
const parsed = parseSingleFile("main.py", Buffer.from("print('hi')", "utf-8"));
expect(parsed.kind).toBe("script");
expect(parsed.language).toBe("python");
});
it("classifies a json file as a resource", () => {
const parsed = parseSingleFile("data.json", Buffer.from("{}", "utf-8"));
expect(parsed.kind).toBe("resource");
expect(parsed.language).toBe("json");
});
it("treats a .txt file as an instruction", () => {
const parsed = parseSingleFile("notes.txt", Buffer.from("hello", "utf-8"));
expect(parsed.kind).toBe("instruction");
expect(parsed.language).toBe("text");
});
it("leaves language null for an unknown extension", () => {
const parsed = parseSingleFile("image.xyz", Buffer.from("plain", "utf-8"));
expect(parsed.kind).toBe("resource");
expect(parsed.language).toBeNull();
});
it("detects binary content and stores no text content", () => {
const buf = Buffer.from([0x00, 0x01, 0x02, 0xff, 0xfe, 0x00]);
const parsed = parseSingleFile("blob.bin", buf);
expect(parsed.isBinary).toBe(true);
expect(parsed.kind).toBe("resource");
expect(parsed.language).toBeNull();
expect(parsed.content).toBe("");
expect(parsed.size).toBe(buf.length);
expect(parsed.hash).toBe(hashBytes(buf));
});
it("strips directory components from the filename", () => {
const parsed = parseSingleFile("some/dir/run.sh", Buffer.from("x", "utf-8"));
expect(parsed.path).toBe("run.sh");
});
it("normalises backslash paths", () => {
const parsed = parseSingleFile("a\\b\\run.sh", Buffer.from("x", "utf-8"));
expect(parsed.path).toBe("run.sh");
});
it("treats SKILL.md as instruction regardless of casing", () => {
const parsed = parseSingleFile("skill.md", Buffer.from("# Title", "utf-8"));
expect(parsed.kind).toBe("instruction");
});
});
describe("parseText", () => {
it("wraps pasted text as a markdown SKILL.md instruction", () => {
const text = "# Pasted Skill\n\nbody";
const parsed = parseText(text);
expect(parsed.path).toBe("SKILL.md");
expect(parsed.kind).toBe("instruction");
expect(parsed.language).toBe("markdown");
expect(parsed.isBinary).toBe(false);
expect(parsed.content).toBe(text);
expect(parsed.size).toBe(Buffer.byteLength(text, "utf-8"));
expect(parsed.hash).toBe(hashBytes(Buffer.from(text, "utf-8")));
});
it("counts byte length (not character length) for multi-byte content", () => {
const text = "café";
const parsed = parseText(text);
expect(parsed.size).toBe(Buffer.byteLength(text, "utf-8"));
expect(parsed.size).toBeGreaterThan(text.length);
});
});
describe("parseZip", () => {
it("extracts text and script files with correct classification", () => {
const zip = zipSync({
"SKILL.md": strToU8("# My Skill\n"),
"scripts/run.sh": strToU8("#!/bin/sh\necho hi\n"),
"data.json": strToU8("{\"a\":1}"),
});
const files = parseZip(Buffer.from(zip));
const byPath = Object.fromEntries(files.map((f) => [f.path, f]));
expect(files).toHaveLength(3);
expect(byPath["SKILL.md"].kind).toBe("instruction");
expect(byPath["SKILL.md"].language).toBe("markdown");
expect(byPath["scripts/run.sh"].kind).toBe("script");
expect(byPath["scripts/run.sh"].language).toBe("shell");
expect(byPath["data.json"].kind).toBe("resource");
expect(byPath["data.json"].language).toBe("json");
});
it("normalises nested paths and preserves directory structure", () => {
const zip = zipSync({
"skill/nested/deep/notes.md": strToU8("notes"),
});
const files = parseZip(Buffer.from(zip));
expect(files).toHaveLength(1);
expect(files[0].path).toBe("skill/nested/deep/notes.md");
expect(files[0].kind).toBe("instruction");
});
it("skips __MACOSX, .git and node_modules entries", () => {
const zip = zipSync({
"SKILL.md": strToU8("# Skill"),
"__MACOSX/._SKILL.md": strToU8("junk"),
".git/config": strToU8("[core]"),
"node_modules/dep/index.js": strToU8("module.exports = 1"),
});
const files = parseZip(Buffer.from(zip));
expect(files.map((f) => f.path)).toEqual(["SKILL.md"]);
});
it("ignores directory entries and empty files", () => {
const zip = zipSync({
"emptydir/": strToU8(""),
"empty.txt": strToU8(""),
"real.md": strToU8("content"),
});
const files = parseZip(Buffer.from(zip));
expect(files.map((f) => f.path)).toEqual(["real.md"]);
});
it("handles binary content inside the archive", () => {
const binary = new Uint8Array([0x00, 0x01, 0x02, 0x03, 0xff, 0x00, 0xfe]);
const zip = zipSync({
"logo.png": binary,
"README.md": strToU8("# Readme"),
});
const files = parseZip(Buffer.from(zip));
const byPath = Object.fromEntries(files.map((f) => [f.path, f]));
expect(byPath["logo.png"].isBinary).toBe(true);
expect(byPath["logo.png"].kind).toBe("resource");
expect(byPath["logo.png"].content).toBe("");
expect(byPath["logo.png"].language).toBeNull();
expect(byPath["README.md"].isBinary).toBe(false);
expect(byPath["README.md"].content).toBe("# Readme");
});
it("computes a stable hash matching hashBytes of the raw content", () => {
const content = "# Stable hash check";
const zip = zipSync({ "SKILL.md": strToU8(content) });
const files = parseZip(Buffer.from(zip));
expect(files[0].hash).toBe(hashBytes(Buffer.from(content, "utf-8")));
});
});
describe("deriveScanName", () => {
it("uses the H1 heading from SKILL.md", () => {
const files = [
file({ path: "skill/SKILL.md", content: "# My Awesome Skill\n\nbody" }),
];
expect(deriveScanName(files, "fallback")).toBe("My Awesome Skill");
});
it("falls back to the name: front-matter when there is no H1", () => {
const files = [
file({
path: "SKILL.md",
content: "---\nname: Front Matter Skill\n---\nbody",
}),
];
expect(deriveScanName(files, "fallback")).toBe("Front Matter Skill");
});
it("strips surrounding quotes from front-matter names", () => {
const files = [
file({ path: "SKILL.md", content: 'name: "Quoted Name"\n' }),
];
expect(deriveScanName(files, "fallback")).toBe("Quoted Name");
});
it("prefers the H1 over the front-matter name", () => {
const files = [
file({
path: "SKILL.md",
content: "---\nname: Front Matter\n---\n# Heading Wins\n",
}),
];
expect(deriveScanName(files, "fallback")).toBe("Heading Wins");
});
it("falls back to the top-level directory when SKILL.md has no title", () => {
const files = [
file({ path: "my-skill/scripts/run.sh", content: "echo hi" }),
];
expect(deriveScanName(files, "fallback")).toBe("my-skill");
});
it("uses the provided fallback when nothing else is available", () => {
expect(deriveScanName([], "the-fallback")).toBe("the-fallback");
});
it("truncates very long names to 120 characters", () => {
const long = "x".repeat(200);
const files = [file({ path: "SKILL.md", content: `# ${long}` })];
expect(deriveScanName(files, "fallback")).toHaveLength(120);
});
});