skillguard/artifacts/api-server/src/lib/skillParser.ts

import { unzipSync, strFromU8 } from "fflate";
import type { FileKind, ParsedFile } from "./ruleCatalog";

const LANG_BY_EXT: Record<string, string> = {
  sh: "shell",
  bash: "shell",
  zsh: "shell",
  py: "python",
  js: "javascript",
  mjs: "javascript",
  cjs: "javascript",
  ts: "typescript",
  rb: "ruby",
  pl: "perl",
  php: "php",
  ps1: "powershell",
  go: "go",
  rs: "rust",
  md: "markdown",
  txt: "text",
  json: "json",
  yaml: "yaml",
  yml: "yaml",
  toml: "toml",
  env: "dotenv",
};

const SCRIPT_EXTS = new Set([
  "sh",
  "bash",
  "zsh",
  "py",
  "js",
  "mjs",
  "cjs",
  "ts",
  "rb",
  "pl",
  "php",
  "ps1",
  "go",
  "rs",
]);

const SKIP_DIRS = ["__macosx/", ".git/", "node_modules/"];

const MAX_ZIP_FILES = 2000;
const MAX_ZIP_TOTAL_BYTES = 60 * 1024 * 1024;
const MAX_ZIP_FILE_BYTES = 5 * 1024 * 1024;

function extOf(path: string): string {
  const base = path.split("/").pop() ?? path;
  const dot = base.lastIndexOf(".");
  return dot >= 0 ? base.slice(dot + 1).toLowerCase() : "";
}

function classify(path: string): FileKind {
  const base = (path.split("/").pop() ?? path).toLowerCase();
  const ext = extOf(path);
  if (base === "skill.md") return "instruction";
  if (SCRIPT_EXTS.has(ext)) return "script";
  if (ext === "md" || ext === "txt") return "instruction";
  return "resource";
}

function isProbablyBinary(bytes: Uint8Array): boolean {
  const len = Math.min(bytes.length, 4000);
  let nontext = 0;
  for (let i = 0; i < len; i++) {
    const b = bytes[i];
    if (b === 0) return true;
    if (b < 9 || (b > 13 && b < 32)) nontext++;
  }
  return len > 0 && nontext / len > 0.3;
}

export function parseZip(buffer: Buffer): ParsedFile[] {
  const files = unzipSync(new Uint8Array(buffer));
  const result: ParsedFile[] = [];
  let totalBytes = 0;
  let processed = 0;
  for (const [rawPath, bytes] of Object.entries(files)) {
    const path = rawPath.replace(/\\/g, "/");
    if (path.endsWith("/")) continue;
    const lower = path.toLowerCase();
    if (SKIP_DIRS.some((d) => lower.includes(d))) continue;
    if (bytes.length === 0) continue;
    if (bytes.length > MAX_ZIP_FILE_BYTES) continue;
    totalBytes += bytes.length;
    if (totalBytes > MAX_ZIP_TOTAL_BYTES) {
      throw new Error("ZIP-Archiv ist zu groß (entpackt).");
    }
    processed += 1;
    if (processed > MAX_ZIP_FILES) {
      throw new Error("ZIP-Archiv enthält zu viele Dateien.");
    }
    if (isProbablyBinary(bytes)) {
      result.push({
        path,
        kind: "resource",
        language: null,
        content: "",
        size: bytes.length,
      });
      continue;
    }
    result.push({
      path,
      kind: classify(path),
      language: LANG_BY_EXT[extOf(path)] ?? null,
      content: strFromU8(bytes),
      size: bytes.length,
    });
  }
  return result;
}

export function parseSingleFile(filename: string, buffer: Buffer): ParsedFile {
  const path = filename.replace(/\\/g, "/").split("/").pop() ?? filename;
  if (isProbablyBinary(new Uint8Array(buffer))) {
    return {
      path,
      kind: "resource",
      language: null,
      content: "",
      size: buffer.length,
    };
  }
  return {
    path,
    kind: classify(path),
    language: LANG_BY_EXT[extOf(path)] ?? null,
    content: buffer.toString("utf-8"),
    size: buffer.length,
  };
}

export function parseText(text: string, name: string): ParsedFile {
  return {
    path: name || "SKILL.md",
    kind: "instruction",
    language: "markdown",
    content: text,
    size: Buffer.byteLength(text, "utf-8"),
  };
}

export function deriveScanName(files: ParsedFile[], fallback: string): string {
  const skillMd = files.find(
    (f) => (f.path.split("/").pop() ?? "").toLowerCase() === "skill.md",
  );
  if (skillMd) {
    const m = skillMd.content.match(/^#\s+(.+)$/m);
    if (m) return m[1].trim().slice(0, 120);
    const nameMatch = skillMd.content.match(/^name:\s*(.+)$/im);
    if (nameMatch) return nameMatch[1].trim().replace(/^["']|["']$/g, "").slice(0, 120);
  }
  const top = files[0]?.path.split("/")[0];
  return (top || fallback).slice(0, 120);
}
SkillGuard: complete frontend wiring and harden backend Original task: build "SkillGuard", a German web app to audit agent skills on two axes (IT-Sicherheit, Datenschutz) with static rule engine + Replit-independent AI analysis configured via an admin backend. This session: - Fixed frontend TS errors: lucide-react name collisions (Badge from ui, Activity from lucide), widened apiType to AiProviderApiType, added queryKey to useGetScan. - Verified all pages render in German (Dashboard, Prüfen, Bericht, Verlauf, Admin) and the full scan flow works end-to-end (malicious sample -> verdict block). Code-review-driven hardening: - POST /api/scans now returns the full ScanDetail (files + findings) to match the OpenAPI contract, instead of only the summary. - AI provider error bodies are redacted (token, Bearer, sk- patterns) before being returned/persisted, and provider fetches now have a 60s timeout. - ZIP parsing now enforces limits (max files, total + per-file size) to mitigate zip-bomb DoS. Updated replit.md (project overview, decisions, gotchas) and added a memory note on lucide-react icon name collisions. 2026-06-08 14:59:17 +00:00			`import { unzipSync, strFromU8 } from "fflate";`
			`import type { FileKind, ParsedFile } from "./ruleCatalog";`

			`const LANG_BY_EXT: Record<string, string> = {`
			`sh: "shell",`
			`bash: "shell",`
			`zsh: "shell",`
			`py: "python",`
			`js: "javascript",`
			`mjs: "javascript",`
			`cjs: "javascript",`
			`ts: "typescript",`
			`rb: "ruby",`
			`pl: "perl",`
			`php: "php",`
			`ps1: "powershell",`
			`go: "go",`
			`rs: "rust",`
			`md: "markdown",`
			`txt: "text",`
			`json: "json",`
			`yaml: "yaml",`
			`yml: "yaml",`
			`toml: "toml",`
			`env: "dotenv",`
			`};`

			`const SCRIPT_EXTS = new Set([`
			`"sh",`
			`"bash",`
			`"zsh",`
			`"py",`
			`"js",`
			`"mjs",`
			`"cjs",`
			`"ts",`
			`"rb",`
			`"pl",`
			`"php",`
			`"ps1",`
			`"go",`
			`"rs",`
			`]);`

			`const SKIP_DIRS = ["__macosx/", ".git/", "node_modules/"];`

			`const MAX_ZIP_FILES = 2000;`
			`const MAX_ZIP_TOTAL_BYTES = 60 * 1024 * 1024;`
			`const MAX_ZIP_FILE_BYTES = 5 * 1024 * 1024;`

			`function extOf(path: string): string {`
			`const base = path.split("/").pop() ?? path;`
			`const dot = base.lastIndexOf(".");`
			`return dot >= 0 ? base.slice(dot + 1).toLowerCase() : "";`
			`}`

			`function classify(path: string): FileKind {`
			`const base = (path.split("/").pop() ?? path).toLowerCase();`
			`const ext = extOf(path);`
			`if (base === "skill.md") return "instruction";`
			`if (SCRIPT_EXTS.has(ext)) return "script";`
			`if (ext === "md" \|\| ext === "txt") return "instruction";`
			`return "resource";`
			`}`

			`function isProbablyBinary(bytes: Uint8Array): boolean {`
			`const len = Math.min(bytes.length, 4000);`
			`let nontext = 0;`
			`for (let i = 0; i < len; i++) {`
			`const b = bytes[i];`
			`if (b === 0) return true;`
			`if (b < 9 \|\| (b > 13 && b < 32)) nontext++;`
			`}`
			`return len > 0 && nontext / len > 0.3;`
			`}`

			`export function parseZip(buffer: Buffer): ParsedFile[] {`
			`const files = unzipSync(new Uint8Array(buffer));`
			`const result: ParsedFile[] = [];`
			`let totalBytes = 0;`
			`let processed = 0;`
			`for (const [rawPath, bytes] of Object.entries(files)) {`
			`const path = rawPath.replace(/\\/g, "/");`
			`if (path.endsWith("/")) continue;`
			`const lower = path.toLowerCase();`
			`if (SKIP_DIRS.some((d) => lower.includes(d))) continue;`
			`if (bytes.length === 0) continue;`
			`if (bytes.length > MAX_ZIP_FILE_BYTES) continue;`
			`totalBytes += bytes.length;`
			`if (totalBytes > MAX_ZIP_TOTAL_BYTES) {`
			`throw new Error("ZIP-Archiv ist zu groß (entpackt).");`
			`}`
			`processed += 1;`
			`if (processed > MAX_ZIP_FILES) {`
			`throw new Error("ZIP-Archiv enthält zu viele Dateien.");`
			`}`
			`if (isProbablyBinary(bytes)) {`
			`result.push({`
			`path,`
			`kind: "resource",`
			`language: null,`
			`content: "",`
			`size: bytes.length,`
			`});`
			`continue;`
			`}`
			`result.push({`
			`path,`
			`kind: classify(path),`
			`language: LANG_BY_EXT[extOf(path)] ?? null,`
			`content: strFromU8(bytes),`
			`size: bytes.length,`
			`});`
			`}`
			`return result;`
			`}`

			`export function parseSingleFile(filename: string, buffer: Buffer): ParsedFile {`
			`const path = filename.replace(/\\/g, "/").split("/").pop() ?? filename;`
			`if (isProbablyBinary(new Uint8Array(buffer))) {`
			`return {`
			`path,`
			`kind: "resource",`
			`language: null,`
			`content: "",`
			`size: buffer.length,`
			`};`
			`}`
			`return {`
			`path,`
			`kind: classify(path),`
			`language: LANG_BY_EXT[extOf(path)] ?? null,`
			`content: buffer.toString("utf-8"),`
			`size: buffer.length,`
			`};`
			`}`

			`export function parseText(text: string, name: string): ParsedFile {`
			`return {`
			`path: name \|\| "SKILL.md",`
			`kind: "instruction",`
			`language: "markdown",`
			`content: text,`
			`size: Buffer.byteLength(text, "utf-8"),`
			`};`
			`}`

			`export function deriveScanName(files: ParsedFile[], fallback: string): string {`
			`const skillMd = files.find(`
			`(f) => (f.path.split("/").pop() ?? "").toLowerCase() === "skill.md",`
			`);`
			`if (skillMd) {`
			`const m = skillMd.content.match(/^#\s+(.+)$/m);`
			`if (m) return m[1].trim().slice(0, 120);`
			`const nameMatch = skillMd.content.match(/^name:\s*(.+)$/im);`
			`if (nameMatch) return nameMatch[1].trim().replace(/^["']\|["']$/g, "").slice(0, 120);`
			`}`
			`const top = files[0]?.path.split("/")[0];`
			`return (top \|\| fallback).slice(0, 120);`
			`}`