import { unzipSync, strFromU8 } from "fflate"; import type { FileKind, ParsedFile } from "./ruleCatalog"; const LANG_BY_EXT: Record = { sh: "shell", bash: "shell", zsh: "shell", py: "python", js: "javascript", mjs: "javascript", cjs: "javascript", ts: "typescript", rb: "ruby", pl: "perl", php: "php", ps1: "powershell", go: "go", rs: "rust", md: "markdown", txt: "text", json: "json", yaml: "yaml", yml: "yaml", toml: "toml", env: "dotenv", }; const SCRIPT_EXTS = new Set([ "sh", "bash", "zsh", "py", "js", "mjs", "cjs", "ts", "rb", "pl", "php", "ps1", "go", "rs", ]); const SKIP_DIRS = ["__macosx/", ".git/", "node_modules/"]; const MAX_ZIP_FILES = 2000; const MAX_ZIP_TOTAL_BYTES = 60 * 1024 * 1024; const MAX_ZIP_FILE_BYTES = 5 * 1024 * 1024; function extOf(path: string): string { const base = path.split("/").pop() ?? path; const dot = base.lastIndexOf("."); return dot >= 0 ? base.slice(dot + 1).toLowerCase() : ""; } function classify(path: string): FileKind { const base = (path.split("/").pop() ?? path).toLowerCase(); const ext = extOf(path); if (base === "skill.md") return "instruction"; if (SCRIPT_EXTS.has(ext)) return "script"; if (ext === "md" || ext === "txt") return "instruction"; return "resource"; } function isProbablyBinary(bytes: Uint8Array): boolean { const len = Math.min(bytes.length, 4000); let nontext = 0; for (let i = 0; i < len; i++) { const b = bytes[i]; if (b === 0) return true; if (b < 9 || (b > 13 && b < 32)) nontext++; } return len > 0 && nontext / len > 0.3; } export function parseZip(buffer: Buffer): ParsedFile[] { const files = unzipSync(new Uint8Array(buffer)); const result: ParsedFile[] = []; let totalBytes = 0; let processed = 0; for (const [rawPath, bytes] of Object.entries(files)) { const path = rawPath.replace(/\\/g, "/"); if (path.endsWith("/")) continue; const lower = path.toLowerCase(); if (SKIP_DIRS.some((d) => lower.includes(d))) continue; if (bytes.length === 0) continue; if (bytes.length > MAX_ZIP_FILE_BYTES) continue; totalBytes += bytes.length; if (totalBytes > MAX_ZIP_TOTAL_BYTES) { throw new Error("ZIP-Archiv ist zu groß (entpackt)."); } processed += 1; if (processed > MAX_ZIP_FILES) { throw new Error("ZIP-Archiv enthält zu viele Dateien."); } if (isProbablyBinary(bytes)) { result.push({ path, kind: "resource", language: null, content: "", size: bytes.length, }); continue; } result.push({ path, kind: classify(path), language: LANG_BY_EXT[extOf(path)] ?? null, content: strFromU8(bytes), size: bytes.length, }); } return result; } export function parseSingleFile(filename: string, buffer: Buffer): ParsedFile { const path = filename.replace(/\\/g, "/").split("/").pop() ?? filename; if (isProbablyBinary(new Uint8Array(buffer))) { return { path, kind: "resource", language: null, content: "", size: buffer.length, }; } return { path, kind: classify(path), language: LANG_BY_EXT[extOf(path)] ?? null, content: buffer.toString("utf-8"), size: buffer.length, }; } export function parseText(text: string, name: string): ParsedFile { return { path: name || "SKILL.md", kind: "instruction", language: "markdown", content: text, size: Buffer.byteLength(text, "utf-8"), }; } export function deriveScanName(files: ParsedFile[], fallback: string): string { const skillMd = files.find( (f) => (f.path.split("/").pop() ?? "").toLowerCase() === "skill.md", ); if (skillMd) { const m = skillMd.content.match(/^#\s+(.+)$/m); if (m) return m[1].trim().slice(0, 120); const nameMatch = skillMd.content.match(/^name:\s*(.+)$/im); if (nameMatch) return nameMatch[1].trim().replace(/^["']|["']$/g, "").slice(0, 120); } const top = files[0]?.path.split("/")[0]; return (top || fallback).slice(0, 120); }