diff --git a/artifacts/api-server/src/lib/skillParser.ts b/artifacts/api-server/src/lib/skillParser.ts index 8d98376..f30e2fc 100644 --- a/artifacts/api-server/src/lib/skillParser.ts +++ b/artifacts/api-server/src/lib/skillParser.ts @@ -1,4 +1,4 @@ -import { unzipSync, strFromU8 } from "fflate"; +import { Unzip, UnzipInflate, strFromU8 } from "fflate"; import type { FileKind, ParsedFile } from "./ruleCatalog"; const LANG_BY_EXT: Record = { @@ -74,44 +74,119 @@ function isProbablyBinary(bytes: Uint8Array): boolean { return len > 0 && nontext / len > 0.3; } +function concatChunks(chunks: Uint8Array[], total: number): Uint8Array { + const out = new Uint8Array(total); + let offset = 0; + for (const c of chunks) { + out.set(c, offset); + offset += c.length; + } + return out; +} + +/** + * Streaming ZIP extraction. Limits (file count, total uncompressed bytes, + * per-file bytes) are enforced WHILE decompressing — input is pushed in small + * chunks and decompression is aborted as soon as a cap is exceeded, so a + * crafted "zip bomb" cannot be fully inflated into memory before checks apply. + */ export function parseZip(buffer: Buffer): ParsedFile[] { - const files = unzipSync(new Uint8Array(buffer)); + const data = new Uint8Array(buffer); const result: ParsedFile[] = []; let totalBytes = 0; - let processed = 0; - for (const [rawPath, bytes] of Object.entries(files)) { - const path = rawPath.replace(/\\/g, "/"); - if (path.endsWith("/")) continue; + let fileCount = 0; + let abortReason: string | null = null; + + const unzip = new Unzip(); + unzip.register(UnzipInflate); + + unzip.onfile = (file) => { + if (abortReason) return; + const path = file.name.replace(/\\/g, "/"); + if (path.endsWith("/")) return; const lower = path.toLowerCase(); - if (SKIP_DIRS.some((d) => lower.includes(d))) continue; - if (bytes.length === 0) continue; - if (bytes.length > MAX_ZIP_FILE_BYTES) continue; - totalBytes += bytes.length; - if (totalBytes > MAX_ZIP_TOTAL_BYTES) { - throw new Error("ZIP-Archiv ist zu groß (entpackt)."); + if (SKIP_DIRS.some((d) => lower.includes(d))) return; + + // Early skip using the declared uncompressed size (when present). Not + // calling start() causes fflate to skip the file's data without inflating. + if ( + typeof file.originalSize === "number" && + file.originalSize > MAX_ZIP_FILE_BYTES + ) { + return; } - processed += 1; - if (processed > MAX_ZIP_FILES) { - throw new Error("ZIP-Archiv enthält zu viele Dateien."); + + fileCount += 1; + if (fileCount > MAX_ZIP_FILES) { + abortReason = "ZIP-Archiv enthält zu viele Dateien."; + return; } - if (isProbablyBinary(bytes)) { - result.push({ - path, - kind: "resource", - language: null, - content: "", - size: bytes.length, - }); - continue; + + const chunks: Uint8Array[] = []; + let fileBytes = 0; + let skipFile = false; + + file.ondata = (err, chunk, final) => { + if (abortReason) return; + if (err) { + abortReason = "ZIP-Archiv konnte nicht entpackt werden."; + return; + } + if (chunk && chunk.length > 0) { + fileBytes += chunk.length; + totalBytes += chunk.length; + if (totalBytes > MAX_ZIP_TOTAL_BYTES) { + abortReason = "ZIP-Archiv ist zu groß (entpackt)."; + return; + } + if (fileBytes > MAX_ZIP_FILE_BYTES) { + // Per-file cap hit (e.g. spoofed header size): drop buffered data, + // keep counting toward the total cap as a backstop. + skipFile = true; + chunks.length = 0; + return; + } + if (!skipFile) chunks.push(chunk); + } + if (final && !abortReason && !skipFile) { + const bytes = concatChunks(chunks, fileBytes); + chunks.length = 0; + if (bytes.length === 0) return; + if (isProbablyBinary(bytes)) { + result.push({ + path, + kind: "resource", + language: null, + content: "", + size: bytes.length, + }); + } else { + result.push({ + path, + kind: classify(path), + language: LANG_BY_EXT[extOf(path)] ?? null, + content: strFromU8(bytes), + size: bytes.length, + }); + } + } + }; + + file.start(); + }; + + const CHUNK = 64 * 1024; + try { + for (let i = 0; i < data.length; i += CHUNK) { + if (abortReason) break; + const end = Math.min(i + CHUNK, data.length); + unzip.push(data.subarray(i, end), end >= data.length); } - result.push({ - path, - kind: classify(path), - language: LANG_BY_EXT[extOf(path)] ?? null, - content: strFromU8(bytes), - size: bytes.length, - }); + } catch { + throw new Error("ZIP-Archiv konnte nicht entpackt werden."); } + + if (abortReason) throw new Error(abortReason); return result; }