import { Unzip, UnzipInflate, strFromU8 } from "fflate"; import type { FileKind, ParsedFile } from "./ruleCatalog"; import { hashBytes } from "./skillFingerprint"; const LANG_BY_EXT: Record = { sh: "shell", bash: "shell", zsh: "shell", py: "python", js: "javascript", mjs: "javascript", cjs: "javascript", ts: "typescript", rb: "ruby", pl: "perl", php: "php", ps1: "powershell", go: "go", rs: "rust", md: "markdown", txt: "text", json: "json", yaml: "yaml", yml: "yaml", toml: "toml", env: "dotenv", }; const SCRIPT_EXTS = new Set([ "sh", "bash", "zsh", "py", "js", "mjs", "cjs", "ts", "rb", "pl", "php", "ps1", "go", "rs", ]); const SKIP_DIRS = ["__macosx/", ".git/", "node_modules/"]; const MAX_ZIP_FILES = 2000; const MAX_ZIP_TOTAL_BYTES = 60 * 1024 * 1024; const MAX_ZIP_FILE_BYTES = 5 * 1024 * 1024; /** * Detects the local-file-header signature ("PK\x03\x04") that every real ZIP * archive begins with. `.skill` files exported by the Skill tooling are ZIP * containers, so we sniff the bytes rather than trusting the file extension. */ function looksLikeZip(buffer: Buffer): boolean { return ( buffer.length >= 4 && buffer[0] === 0x50 && buffer[1] === 0x4b && buffer[2] === 0x03 && buffer[3] === 0x04 ); } function extOf(path: string): string { const base = path.split("/").pop() ?? path; const dot = base.lastIndexOf("."); return dot >= 0 ? base.slice(dot + 1).toLowerCase() : ""; } function classify(path: string): FileKind { const base = (path.split("/").pop() ?? path).toLowerCase(); const ext = extOf(path); if (base === "skill.md") return "instruction"; if (SCRIPT_EXTS.has(ext)) return "script"; if (ext === "md" || ext === "txt") return "instruction"; return "resource"; } function isProbablyBinary(bytes: Uint8Array): boolean { const len = Math.min(bytes.length, 4000); let nontext = 0; for (let i = 0; i < len; i++) { const b = bytes[i]; if (b === 0) return true; if (b < 9 || (b > 13 && b < 32)) nontext++; } return len > 0 && nontext / len > 0.3; } function concatChunks(chunks: Uint8Array[], total: number): Uint8Array { const out = new Uint8Array(total); let offset = 0; for (const c of chunks) { out.set(c, offset); offset += c.length; } return out; } /** * Streaming ZIP extraction. Limits (file count, total uncompressed bytes, * per-file bytes) are enforced WHILE decompressing — input is pushed in small * chunks and decompression is aborted as soon as a cap is exceeded, so a * crafted "zip bomb" cannot be fully inflated into memory before checks apply. */ export function parseZip(buffer: Buffer): ParsedFile[] { const data = new Uint8Array(buffer); const result: ParsedFile[] = []; let totalBytes = 0; let fileCount = 0; let abortReason: string | null = null; const unzip = new Unzip(); unzip.register(UnzipInflate); unzip.onfile = (file) => { if (abortReason) return; const path = file.name.replace(/\\/g, "/"); if (path.endsWith("/")) return; const lower = path.toLowerCase(); if (SKIP_DIRS.some((d) => lower.includes(d))) return; // Early skip using the declared uncompressed size (when present). Not // calling start() causes fflate to skip the file's data without inflating. if ( typeof file.originalSize === "number" && file.originalSize > MAX_ZIP_FILE_BYTES ) { return; } fileCount += 1; if (fileCount > MAX_ZIP_FILES) { abortReason = "ZIP-Archiv enthält zu viele Dateien."; return; } const chunks: Uint8Array[] = []; let fileBytes = 0; let skipFile = false; file.ondata = (err, chunk, final) => { if (abortReason) return; if (err) { abortReason = "ZIP-Archiv konnte nicht entpackt werden."; return; } if (chunk && chunk.length > 0) { fileBytes += chunk.length; totalBytes += chunk.length; if (totalBytes > MAX_ZIP_TOTAL_BYTES) { abortReason = "ZIP-Archiv ist zu groß (entpackt)."; return; } if (fileBytes > MAX_ZIP_FILE_BYTES) { // Per-file cap hit (e.g. spoofed header size): drop buffered data, // keep counting toward the total cap as a backstop. skipFile = true; chunks.length = 0; return; } if (!skipFile) chunks.push(chunk); } if (final && !abortReason && !skipFile) { const bytes = concatChunks(chunks, fileBytes); chunks.length = 0; if (bytes.length === 0) return; const hash = hashBytes(bytes); if (isProbablyBinary(bytes)) { result.push({ path, kind: "resource", language: null, content: "", size: bytes.length, hash, isBinary: true, }); } else { result.push({ path, kind: classify(path), language: LANG_BY_EXT[extOf(path)] ?? null, content: strFromU8(bytes), size: bytes.length, hash, isBinary: false, }); } } }; file.start(); }; const CHUNK = 64 * 1024; try { for (let i = 0; i < data.length; i += CHUNK) { if (abortReason) break; const end = Math.min(i + CHUNK, data.length); unzip.push(data.subarray(i, end), end >= data.length); } } catch { throw new Error("ZIP-Archiv konnte nicht entpackt werden."); } if (abortReason) throw new Error(abortReason); return result; } /** * Entry point for uploaded files (single-file *and* ZIP-area uploads). A * `.skill` file is really a ZIP container, so we treat any upload that either * carries the ZIP signature or uses a `.zip`/`.skill` extension as an archive * and extract it via the streaming ZIP path. If the buffer is not a real * archive (e.g. someone named a plain text file `.skill`) we fall back cleanly * to single-file handling instead of failing. Real archives still surface their * limit/corruption errors so the existing protections stay in force. */ export function parseUpload(filename: string, buffer: Buffer): ParsedFile[] { const isZipSignature = looksLikeZip(buffer); const hasArchiveExt = /\.(zip|skill)$/i.test(filename); if (isZipSignature || hasArchiveExt) { try { const files = parseZip(buffer); if (files.length > 0) return files; // A valid-but-empty archive falls through to single-file handling. } catch (err) { // A buffer with a real ZIP signature is genuinely an archive, so limit // and corruption errors must surface. An extension-only guess that is // not actually a ZIP falls back to single-file handling. if (isZipSignature) throw err; } } return [parseSingleFile(filename, buffer)]; } export function parseSingleFile(filename: string, buffer: Buffer): ParsedFile { const path = filename.replace(/\\/g, "/").split("/").pop() ?? filename; const hash = hashBytes(buffer); if (isProbablyBinary(new Uint8Array(buffer))) { return { path, kind: "resource", language: null, content: "", size: buffer.length, hash, isBinary: true, }; } return { path, kind: classify(path), language: LANG_BY_EXT[extOf(path)] ?? null, content: buffer.toString("utf-8"), size: buffer.length, hash, isBinary: false, }; } export function parseText(text: string): ParsedFile { return { path: "SKILL.md", kind: "instruction", language: "markdown", content: text, size: Buffer.byteLength(text, "utf-8"), hash: hashBytes(Buffer.from(text, "utf-8")), isBinary: false, }; } export function deriveScanName(files: ParsedFile[], fallback: string): string { const skillMd = files.find( (f) => (f.path.split("/").pop() ?? "").toLowerCase() === "skill.md", ); if (skillMd) { const m = skillMd.content.match(/^#\s+(.+)$/m); if (m) return m[1].trim().slice(0, 120); const nameMatch = skillMd.content.match(/^name:\s*(.+)$/im); if (nameMatch) return nameMatch[1].trim().replace(/^["']|["']$/g, "").slice(0, 120); } const top = files[0]?.path.split("/")[0]; return (top || fallback).slice(0, 120); }