Add unit tests for the skill upload parser
Task #18: Automatically test that uploaded skill files are read correctly. The skill parser (artifacts/api-server/src/lib/skillParser.ts) had no automated tests. A regression there could silently mis-read uploads. Added a new Vitest suite covering the parsing/classification logic (NOT the ZIP size/safety limits, which are tracked by a separate task). New file: artifacts/api-server/src/lib/skillParser.test.ts Coverage: - parseSingleFile: kind/language/hash/size/isBinary for .md, .sh, .py, .json, .txt, unknown ext, and a binary blob; path normalisation (dir strip, backslashes); case-insensitive SKILL.md. - parseText: wraps pasted text as markdown SKILL.md; byte-length sizing for multi-byte content. - parseZip (in-memory ZIP via fflate.zipSync): correct classification, nested path preservation, __MACOSX/.git/node_modules skipping, dir/empty entry skipping, binary-vs-text handling, stable hashing. - deriveScanName: H1 from SKILL.md, name: front-matter fallback, quote stripping, H1 preferred over front-matter, top-dir fallback, provided fallback, 120-char truncation. Verification: `pnpm --filter @workspace/api-server run test` → 59 passed (24 new). Typecheck of the new test file is clean; pre-existing typecheck errors in src/routes/scans.ts are unrelated and out of scope. Replit-Task-Id: 06f18e6a-2d8d-4bf2-b2ae-29675f04c059
This commit is contained in:
parent
532f42117f
commit
769c78aaef
1 changed files with 244 additions and 0 deletions
244
artifacts/api-server/src/lib/skillParser.test.ts
Normal file
244
artifacts/api-server/src/lib/skillParser.test.ts
Normal file
|
|
@ -0,0 +1,244 @@
|
||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { zipSync, strToU8 } from "fflate";
|
||||||
|
import {
|
||||||
|
parseSingleFile,
|
||||||
|
parseText,
|
||||||
|
parseZip,
|
||||||
|
deriveScanName,
|
||||||
|
} from "./skillParser";
|
||||||
|
import { hashBytes } from "./skillFingerprint";
|
||||||
|
import type { ParsedFile } from "./ruleCatalog";
|
||||||
|
|
||||||
|
function file(overrides: Partial<ParsedFile>): ParsedFile {
|
||||||
|
return {
|
||||||
|
path: "SKILL.md",
|
||||||
|
kind: "instruction",
|
||||||
|
language: "markdown",
|
||||||
|
content: "",
|
||||||
|
size: 0,
|
||||||
|
hash: "",
|
||||||
|
isBinary: false,
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("parseSingleFile", () => {
|
||||||
|
it("classifies a markdown file as instruction", () => {
|
||||||
|
const buf = Buffer.from("# Hello\n\nsome text", "utf-8");
|
||||||
|
const parsed = parseSingleFile("SKILL.md", buf);
|
||||||
|
expect(parsed.path).toBe("SKILL.md");
|
||||||
|
expect(parsed.kind).toBe("instruction");
|
||||||
|
expect(parsed.language).toBe("markdown");
|
||||||
|
expect(parsed.isBinary).toBe(false);
|
||||||
|
expect(parsed.content).toBe("# Hello\n\nsome text");
|
||||||
|
expect(parsed.size).toBe(buf.length);
|
||||||
|
expect(parsed.hash).toBe(hashBytes(buf));
|
||||||
|
});
|
||||||
|
|
||||||
|
it("classifies a shell script as a script with shell language", () => {
|
||||||
|
const buf = Buffer.from("#!/bin/sh\necho hi", "utf-8");
|
||||||
|
const parsed = parseSingleFile("run.sh", buf);
|
||||||
|
expect(parsed.kind).toBe("script");
|
||||||
|
expect(parsed.language).toBe("shell");
|
||||||
|
expect(parsed.isBinary).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("classifies a python script as a script with python language", () => {
|
||||||
|
const parsed = parseSingleFile("main.py", Buffer.from("print('hi')", "utf-8"));
|
||||||
|
expect(parsed.kind).toBe("script");
|
||||||
|
expect(parsed.language).toBe("python");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("classifies a json file as a resource", () => {
|
||||||
|
const parsed = parseSingleFile("data.json", Buffer.from("{}", "utf-8"));
|
||||||
|
expect(parsed.kind).toBe("resource");
|
||||||
|
expect(parsed.language).toBe("json");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("treats a .txt file as an instruction", () => {
|
||||||
|
const parsed = parseSingleFile("notes.txt", Buffer.from("hello", "utf-8"));
|
||||||
|
expect(parsed.kind).toBe("instruction");
|
||||||
|
expect(parsed.language).toBe("text");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("leaves language null for an unknown extension", () => {
|
||||||
|
const parsed = parseSingleFile("image.xyz", Buffer.from("plain", "utf-8"));
|
||||||
|
expect(parsed.kind).toBe("resource");
|
||||||
|
expect(parsed.language).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("detects binary content and stores no text content", () => {
|
||||||
|
const buf = Buffer.from([0x00, 0x01, 0x02, 0xff, 0xfe, 0x00]);
|
||||||
|
const parsed = parseSingleFile("blob.bin", buf);
|
||||||
|
expect(parsed.isBinary).toBe(true);
|
||||||
|
expect(parsed.kind).toBe("resource");
|
||||||
|
expect(parsed.language).toBeNull();
|
||||||
|
expect(parsed.content).toBe("");
|
||||||
|
expect(parsed.size).toBe(buf.length);
|
||||||
|
expect(parsed.hash).toBe(hashBytes(buf));
|
||||||
|
});
|
||||||
|
|
||||||
|
it("strips directory components from the filename", () => {
|
||||||
|
const parsed = parseSingleFile("some/dir/run.sh", Buffer.from("x", "utf-8"));
|
||||||
|
expect(parsed.path).toBe("run.sh");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("normalises backslash paths", () => {
|
||||||
|
const parsed = parseSingleFile("a\\b\\run.sh", Buffer.from("x", "utf-8"));
|
||||||
|
expect(parsed.path).toBe("run.sh");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("treats SKILL.md as instruction regardless of casing", () => {
|
||||||
|
const parsed = parseSingleFile("skill.md", Buffer.from("# Title", "utf-8"));
|
||||||
|
expect(parsed.kind).toBe("instruction");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("parseText", () => {
|
||||||
|
it("wraps pasted text as a markdown SKILL.md instruction", () => {
|
||||||
|
const text = "# Pasted Skill\n\nbody";
|
||||||
|
const parsed = parseText(text);
|
||||||
|
expect(parsed.path).toBe("SKILL.md");
|
||||||
|
expect(parsed.kind).toBe("instruction");
|
||||||
|
expect(parsed.language).toBe("markdown");
|
||||||
|
expect(parsed.isBinary).toBe(false);
|
||||||
|
expect(parsed.content).toBe(text);
|
||||||
|
expect(parsed.size).toBe(Buffer.byteLength(text, "utf-8"));
|
||||||
|
expect(parsed.hash).toBe(hashBytes(Buffer.from(text, "utf-8")));
|
||||||
|
});
|
||||||
|
|
||||||
|
it("counts byte length (not character length) for multi-byte content", () => {
|
||||||
|
const text = "café";
|
||||||
|
const parsed = parseText(text);
|
||||||
|
expect(parsed.size).toBe(Buffer.byteLength(text, "utf-8"));
|
||||||
|
expect(parsed.size).toBeGreaterThan(text.length);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("parseZip", () => {
|
||||||
|
it("extracts text and script files with correct classification", () => {
|
||||||
|
const zip = zipSync({
|
||||||
|
"SKILL.md": strToU8("# My Skill\n"),
|
||||||
|
"scripts/run.sh": strToU8("#!/bin/sh\necho hi\n"),
|
||||||
|
"data.json": strToU8("{\"a\":1}"),
|
||||||
|
});
|
||||||
|
const files = parseZip(Buffer.from(zip));
|
||||||
|
const byPath = Object.fromEntries(files.map((f) => [f.path, f]));
|
||||||
|
|
||||||
|
expect(files).toHaveLength(3);
|
||||||
|
expect(byPath["SKILL.md"].kind).toBe("instruction");
|
||||||
|
expect(byPath["SKILL.md"].language).toBe("markdown");
|
||||||
|
expect(byPath["scripts/run.sh"].kind).toBe("script");
|
||||||
|
expect(byPath["scripts/run.sh"].language).toBe("shell");
|
||||||
|
expect(byPath["data.json"].kind).toBe("resource");
|
||||||
|
expect(byPath["data.json"].language).toBe("json");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("normalises nested paths and preserves directory structure", () => {
|
||||||
|
const zip = zipSync({
|
||||||
|
"skill/nested/deep/notes.md": strToU8("notes"),
|
||||||
|
});
|
||||||
|
const files = parseZip(Buffer.from(zip));
|
||||||
|
expect(files).toHaveLength(1);
|
||||||
|
expect(files[0].path).toBe("skill/nested/deep/notes.md");
|
||||||
|
expect(files[0].kind).toBe("instruction");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("skips __MACOSX, .git and node_modules entries", () => {
|
||||||
|
const zip = zipSync({
|
||||||
|
"SKILL.md": strToU8("# Skill"),
|
||||||
|
"__MACOSX/._SKILL.md": strToU8("junk"),
|
||||||
|
".git/config": strToU8("[core]"),
|
||||||
|
"node_modules/dep/index.js": strToU8("module.exports = 1"),
|
||||||
|
});
|
||||||
|
const files = parseZip(Buffer.from(zip));
|
||||||
|
expect(files.map((f) => f.path)).toEqual(["SKILL.md"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("ignores directory entries and empty files", () => {
|
||||||
|
const zip = zipSync({
|
||||||
|
"emptydir/": strToU8(""),
|
||||||
|
"empty.txt": strToU8(""),
|
||||||
|
"real.md": strToU8("content"),
|
||||||
|
});
|
||||||
|
const files = parseZip(Buffer.from(zip));
|
||||||
|
expect(files.map((f) => f.path)).toEqual(["real.md"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles binary content inside the archive", () => {
|
||||||
|
const binary = new Uint8Array([0x00, 0x01, 0x02, 0x03, 0xff, 0x00, 0xfe]);
|
||||||
|
const zip = zipSync({
|
||||||
|
"logo.png": binary,
|
||||||
|
"README.md": strToU8("# Readme"),
|
||||||
|
});
|
||||||
|
const files = parseZip(Buffer.from(zip));
|
||||||
|
const byPath = Object.fromEntries(files.map((f) => [f.path, f]));
|
||||||
|
|
||||||
|
expect(byPath["logo.png"].isBinary).toBe(true);
|
||||||
|
expect(byPath["logo.png"].kind).toBe("resource");
|
||||||
|
expect(byPath["logo.png"].content).toBe("");
|
||||||
|
expect(byPath["logo.png"].language).toBeNull();
|
||||||
|
expect(byPath["README.md"].isBinary).toBe(false);
|
||||||
|
expect(byPath["README.md"].content).toBe("# Readme");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("computes a stable hash matching hashBytes of the raw content", () => {
|
||||||
|
const content = "# Stable hash check";
|
||||||
|
const zip = zipSync({ "SKILL.md": strToU8(content) });
|
||||||
|
const files = parseZip(Buffer.from(zip));
|
||||||
|
expect(files[0].hash).toBe(hashBytes(Buffer.from(content, "utf-8")));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("deriveScanName", () => {
|
||||||
|
it("uses the H1 heading from SKILL.md", () => {
|
||||||
|
const files = [
|
||||||
|
file({ path: "skill/SKILL.md", content: "# My Awesome Skill\n\nbody" }),
|
||||||
|
];
|
||||||
|
expect(deriveScanName(files, "fallback")).toBe("My Awesome Skill");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("falls back to the name: front-matter when there is no H1", () => {
|
||||||
|
const files = [
|
||||||
|
file({
|
||||||
|
path: "SKILL.md",
|
||||||
|
content: "---\nname: Front Matter Skill\n---\nbody",
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
expect(deriveScanName(files, "fallback")).toBe("Front Matter Skill");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("strips surrounding quotes from front-matter names", () => {
|
||||||
|
const files = [
|
||||||
|
file({ path: "SKILL.md", content: 'name: "Quoted Name"\n' }),
|
||||||
|
];
|
||||||
|
expect(deriveScanName(files, "fallback")).toBe("Quoted Name");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("prefers the H1 over the front-matter name", () => {
|
||||||
|
const files = [
|
||||||
|
file({
|
||||||
|
path: "SKILL.md",
|
||||||
|
content: "---\nname: Front Matter\n---\n# Heading Wins\n",
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
expect(deriveScanName(files, "fallback")).toBe("Heading Wins");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("falls back to the top-level directory when SKILL.md has no title", () => {
|
||||||
|
const files = [
|
||||||
|
file({ path: "my-skill/scripts/run.sh", content: "echo hi" }),
|
||||||
|
];
|
||||||
|
expect(deriveScanName(files, "fallback")).toBe("my-skill");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("uses the provided fallback when nothing else is available", () => {
|
||||||
|
expect(deriveScanName([], "the-fallback")).toBe("the-fallback");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("truncates very long names to 120 characters", () => {
|
||||||
|
const long = "x".repeat(200);
|
||||||
|
const files = [file({ path: "SKILL.md", content: `# ${long}` })];
|
||||||
|
expect(deriveScanName(files, "fallback")).toHaveLength(120);
|
||||||
|
});
|
||||||
|
});
|
||||||
Loading…
Add table
Reference in a new issue