import { Router, type IRouter } from "express"; import { db } from "@workspace/db"; import { scansTable, scanFilesTable, findingsTable, type Scan, type ScanFile, type Finding, type ScanRelation, aiProvidersTable, promptsTable, type Prompt, } from "@workspace/db"; import { eq, desc, count } from "drizzle-orm"; import rateLimit from "express-rate-limit"; import { zipSync, strToU8 } from "fflate"; import { ListScansResponse, CreateScanBody, GetScanParams, GetScanResponse, DeleteScanParams, CompareScansParams, CompareScansResponse, GetScanLineageResponse, ModerateScanParams, ModerateScanBody, ModerateScanResponse, } from "@workspace/api-zod"; import { resolveAuth, requireAdmin } from "../middlewares/auth"; import { parseUpload, parseText, deriveScanName, } from "../lib/skillParser"; import { analyzeSkill, type EngineResult } from "../lib/scanEngine"; import { STATIC_RULES, AI_RULES, type ParsedFile } from "../lib/ruleCatalog"; import { generateSkillDescription } from "../lib/aiAnalysis"; import { computeFingerprint } from "../lib/skillFingerprint"; import { lineDiff, lineSimilarity } from "../lib/lineDiff"; import { logger } from "../lib/logger"; const router: IRouter = Router(); type CreateScanInput = ReturnType; export function serializeScan(scan: Scan) { return { id: scan.id, name: scan.name, description: scan.description, source: scan.source, status: scan.status, verdict: scan.verdict, riskScore: scan.riskScore, fileCount: scan.fileCount, aiUsed: scan.aiUsed, aiError: scan.aiError, findingCounts: scan.findingCounts, fingerprint: scan.fingerprint, relation: scan.relation, similarity: scan.similarity, comparedScanId: scan.comparedScanId, hidden: scan.hidden, createdAt: scan.createdAt.toISOString(), }; } // Public scan creation is rate-limited per client to curb abuse of the open // upload/test endpoints. Admin and read endpoints are unaffected. const scanRateLimiter = rateLimit({ windowMs: 60 * 1000, limit: 10, standardHeaders: true, legacyHeaders: false, message: { message: "Zu viele Scans in kurzer Zeit. Bitte später erneut versuchen.", }, }); function serializeFile(f: ScanFile) { return { path: f.path, kind: f.kind, language: f.language, size: f.size, hash: f.hash, hasContent: f.content !== null, content: f.content, }; } type ComparedScan = { id: number; name: string; verdict: string; riskScore: number; createdAt: string; }; async function resolveComparedScan( id: number | null, ): Promise { if (id == null) return null; const [s] = await db.select().from(scansTable).where(eq(scansTable.id, id)); if (!s) return null; return { id: s.id, name: s.name, verdict: s.verdict, riskScore: s.riskScore, createdAt: s.createdAt.toISOString(), }; } export async function countFingerprint(fingerprint: string): Promise { if (!fingerprint) return 1; const [row] = await db .select({ c: count() }) .from(scansTable) .where(eq(scansTable.fingerprint, fingerprint)); return Number(row?.c ?? 1); } function serializeFinding(f: Finding) { return { id: f.id, ruleId: f.ruleId, axis: f.axis, severity: f.severity, title: f.title, description: f.description, remediation: f.remediation, file: f.file, line: f.line, snippet: f.snippet, detectedBy: f.detectedBy, }; } function serializeScanDetail( scan: Scan, files: ScanFile[], findings: Finding[], checkCount: number, comparedScan: ComparedScan | null, ) { return { ...serializeScan(scan), checkpoints: scan.checkpoints ?? [], files: files.map(serializeFile), findings: [...findings].sort((a, b) => a.id - b.id).map(serializeFinding), checkCount, comparedScan, }; } async function buildScanDetail( scan: Scan, files: ScanFile[], findings: Finding[], ) { const [checkCount, comparedScan] = await Promise.all([ countFingerprint(scan.fingerprint), resolveComparedScan(scan.comparedScanId), ]); return serializeScanDetail(scan, files, findings, checkCount, comparedScan); } type RelationInfo = { relation: ScanRelation; similarity: number | null; comparedScanId: number | null; }; /** * Determine how the freshly parsed skill relates to the scans already stored. * Exact fingerprint match -> identical; otherwise the most content-similar prior * skill (when it overlaps enough or shares a byte-identical file) -> modified; * nothing meaningfully in common -> new. */ export async function computeRelation( fingerprint: string, files: ParsedFile[], ): Promise { if (fingerprint) { const identical = await db .select({ id: scansTable.id }) .from(scansTable) .where(eq(scansTable.fingerprint, fingerprint)) .orderBy(desc(scansTable.createdAt)) .limit(1); if (identical.length > 0) { return { relation: "identical", similarity: 100, comparedScanId: identical[0].id }; } } // Group every prior scan's files so we can measure how much of the file tree // overlaps. We match on file *paths* (so single-file skills whose content // changed are still recognised as a modified version of the same skill) and // fall back to hash overlap to disambiguate equally-good path matches. const priorFiles = await db .select({ scanId: scanFilesTable.scanId, path: scanFilesTable.path, hash: scanFilesTable.hash, content: scanFilesTable.content, }) .from(scanFilesTable); const byScan = new Map>(); for (const row of priorFiles) { if (!row.path) continue; let map = byScan.get(row.scanId); if (!map) { map = new Map(); byScan.set(row.scanId, map); } map.set(row.path, { hash: row.hash, content: row.content }); } const newPaths = new Set(files.map((f) => f.path)); const newHashes = new Set( files.map((f) => f.hash).filter((h): h is string => Boolean(h)), ); // Score every prior scan by content-aware similarity (not just path overlap). // Path overlap alone is misleading: single-file text skills always share the // path "SKILL.md", so unrelated pastes would otherwise look related. We pick // the most similar prior scan and only call it a modified version when the // content actually overlaps enough OR at least one file is byte-identical. let bestId: number | null = null; let bestSimilarity = -1; let bestHasHashOverlap = false; for (const [scanId, map] of byScan) { const priorHashes = new Set( Array.from(map.values()) .map((v) => v.hash) .filter(Boolean), ); const sharesPath = Array.from(map.keys()).some((p) => newPaths.has(p)); const hashOverlap = Array.from(priorHashes).some((h) => newHashes.has(h)); // Nothing in common at all -> cannot be a version of this skill. if (!sharesPath && !hashOverlap) continue; const similarity = computeContentSimilarity(files, map); if ( similarity > bestSimilarity || (similarity === bestSimilarity && hashOverlap && !bestHasHashOverlap) ) { bestSimilarity = similarity; bestId = scanId; bestHasHashOverlap = hashOverlap; } } // Treat as a modified version only with a meaningful content overlap or a // shared byte-identical file; otherwise it is a genuinely new skill that just // happens to reuse a common file path. if ( bestId !== null && (bestHasHashOverlap || bestSimilarity >= MODIFIED_SIMILARITY_THRESHOLD) ) { return { relation: "modified", similarity: bestSimilarity, comparedScanId: bestId, }; } return { relation: "new", similarity: null, comparedScanId: null }; } /** * Minimum content similarity (0-100) for a non-identical upload to count as a * modified version of a prior scan rather than a brand-new skill. Keeps * unrelated single-file pastes (which always share the "SKILL.md" path) from * being falsely linked together. */ const MODIFIED_SIMILARITY_THRESHOLD = 40; /** * Content-aware similarity (0-100) between the new files and a matched prior * scan. Identical files (same hash) count fully; changed text files use the * line-level similarity; added/removed or changed binary files count as 0. */ export function computeContentSimilarity( newFiles: ParsedFile[], prior: Map, ): number { const newByPath = new Map(newFiles.map((f) => [f.path, f])); const paths = new Set([...newByPath.keys(), ...prior.keys()]); if (paths.size === 0) return 0; let total = 0; for (const path of paths) { const cur = newByPath.get(path); const prev = prior.get(path); if (!cur || !prev) continue; // added or removed -> 0 if (cur.hash && cur.hash === prev.hash) { total += 1; continue; } if (!cur.isBinary && prev.content !== null) { total += lineSimilarity(prev.content, cur.content); } // changed binary -> 0 } return Math.round((total / paths.size) * 100); } type ParseResult = | { ok: true; files: ParsedFile[] } | { ok: false; status: number; message: string }; function parseScanInput(input: CreateScanInput): ParseResult { try { let files: ParsedFile[]; if (input.source === "zip") { if (!input.contentBase64) return { ok: false, status: 400, message: "ZIP-Inhalt fehlt." }; files = parseUpload( input.filename ?? "archiv.zip", Buffer.from(input.contentBase64, "base64"), ); } else if (input.source === "file") { if (!input.contentBase64) return { ok: false, status: 400, message: "Dateiinhalt fehlt." }; files = parseUpload( input.filename ?? "datei", Buffer.from(input.contentBase64, "base64"), ); } else { if (!input.text || !input.text.trim()) return { ok: false, status: 400, message: "Text fehlt." }; files = [parseText(input.text)]; } if (files.length === 0) return { ok: false, status: 400, message: "Keine analysierbaren Dateien gefunden.", }; return { ok: true, files }; } catch (err) { logger.error({ err }, "Skill-Parsing fehlgeschlagen"); return { ok: false, status: 400, message: "Das Skill konnte nicht gelesen werden. Bitte prüfen Sie das Format (gültiges ZIP / Textdatei).", }; } } async function persistScan( input: CreateScanInput, name: string, files: ParsedFile[], result: EngineResult, ): Promise<{ scan: Scan; files: ScanFile[]; findings: Finding[] }> { const fingerprint = computeFingerprint( files.map((f) => ({ path: f.path, hash: f.hash })), ); // Determine relation against the existing database BEFORE inserting the new // scan so the comparison excludes this scan itself. The skill is always // re-scanned; identical uploads are stored as duplicates. const relationInfo = await computeRelation(fingerprint, files); const [scan] = await db .insert(scansTable) .values({ name, description: result.aiDescription, source: input.source, status: "completed", verdict: result.verdict, riskScore: result.riskScore, fileCount: files.length, aiUsed: result.aiUsed, aiError: result.aiError, findingCounts: result.counts, checkpoints: result.checkpoints, fingerprint, relation: relationInfo.relation, similarity: relationInfo.similarity, comparedScanId: relationInfo.comparedScanId, }) .returning(); let insertedFiles: ScanFile[] = []; if (files.length > 0) { insertedFiles = await db .insert(scanFilesTable) .values( files.map((f) => ({ scanId: scan.id, path: f.path, kind: f.kind, language: f.language, size: f.size, hash: f.hash, content: f.isBinary ? null : f.content, })), ) .returning(); } let insertedFindings: Finding[] = []; if (result.findings.length > 0) { insertedFindings = await db .insert(findingsTable) .values( result.findings.map((f) => ({ scanId: scan.id, ruleId: f.ruleId, axis: f.axis, severity: f.severity, title: f.title, description: f.description, remediation: f.remediation, file: f.file, line: f.line, snippet: f.snippet, detectedBy: f.detectedBy, })), ) .returning(); } return { scan, files: insertedFiles, findings: insertedFindings }; } router.get("/scans", async (req, res) => { // Public visitors only see the released catalog; admins also see hidden scans // so they can manage moderation. const info = await resolveAuth(req); const rows = await db .select() .from(scansTable) .where(info.isAdmin ? undefined : eq(scansTable.hidden, false)) .orderBy(desc(scansTable.createdAt)); res.json(ListScansResponse.parse(rows.map(serializeScan))); }); router.post("/scans", scanRateLimiter, async (req, res) => { const parsed = CreateScanBody.safeParse(req.body); if (!parsed.success) { return res .status(400) .json({ message: "Ungültige Eingabe", details: parsed.error.issues }); } const input = parsed.data; const parseResult = parseScanInput(input); if (!parseResult.ok) { return res.status(parseResult.status).json({ message: parseResult.message }); } const files = parseResult.files; const name = input.name?.trim() || deriveScanName(files, "Unbenanntes Skill"); const result = await analyzeSkill(files, input.useAi); const { scan, files: insertedFiles, findings } = await persistScan( input, name, files, result, ); return res .status(201) .json(GetScanResponse.parse(await buildScanDetail(scan, insertedFiles, findings))); }); const STREAM_PACING_MS = 80; const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); router.post("/scans/stream", scanRateLimiter, async (req, res) => { const parsed = CreateScanBody.safeParse(req.body); if (!parsed.success) { res .status(400) .json({ message: "Ungültige Eingabe", details: parsed.error.issues }); return; } const input = parsed.data; const parseResult = parseScanInput(input); if (!parseResult.ok) { res.status(parseResult.status).json({ message: parseResult.message }); return; } const files = parseResult.files; const name = input.name?.trim() || deriveScanName(files, "Unbenanntes Skill"); res.status(200); res.setHeader("Content-Type", "application/x-ndjson; charset=utf-8"); res.setHeader("Cache-Control", "no-cache, no-transform"); res.setHeader("X-Accel-Buffering", "no"); res.setHeader("Connection", "keep-alive"); res.flushHeaders(); // Detect a genuine client disconnect. NOTE: do NOT use req.on("close") here — // for a POST it fires as soon as the request body is consumed, not on abort. // res "close" before writableFinished means the client went away. let aborted = false; res.on("close", () => { if (!res.writableFinished) aborted = true; }); const write = (obj: unknown) => { if (aborted || res.writableEnded) return; res.write(JSON.stringify(obj) + "\n"); }; write({ type: "start", name, fileCount: files.length, totalChecks: STATIC_RULES.length + (input.useAi ? AI_RULES.length : 0), }); let cumulative = 0; try { const result = await analyzeSkill(files, input.useAi, async (event) => { if (event.type === "ai-start") { write({ type: "ai-start" }); return; } cumulative += event.checkpoint.scoreDelta; write({ type: "checkpoint", checkpoint: event.checkpoint, runningScore: Math.min(100, cumulative), }); if (!aborted) await delay(STREAM_PACING_MS); }); const { scan } = await persistScan(input, name, files, result); write({ type: "done", scanId: scan.id, riskScore: result.riskScore, verdict: result.verdict, findingCounts: result.counts, aiUsed: result.aiUsed, aiError: result.aiError, }); if (!aborted && !res.writableEnded) res.end(); } catch (err) { logger.error({ err }, "Streaming-Scan fehlgeschlagen"); write({ type: "error", message: "Die Analyse ist fehlgeschlagen." }); if (!aborted && !res.writableEnded) res.end(); } }); router.get("/scans/:id", async (req, res) => { const params = GetScanParams.safeParse(req.params); if (!params.success) return res.status(400).json({ message: "Ungültige ID" }); const [scan] = await db .select() .from(scansTable) .where(eq(scansTable.id, params.data.id)); if (!scan) return res.status(404).json({ message: "Scan nicht gefunden" }); // Hidden scans are invisible to the public; only admins can open the report. if (scan.hidden) { const info = await resolveAuth(req); if (!info.isAdmin) return res.status(404).json({ message: "Scan nicht gefunden" }); } const files = await db .select() .from(scanFilesTable) .where(eq(scanFilesTable.scanId, scan.id)); const findings = await db .select() .from(findingsTable) .where(eq(findingsTable.scanId, scan.id)) .orderBy(findingsTable.id); return res.json(GetScanResponse.parse(await buildScanDetail(scan, files, findings))); }); // Public download of a skill that PASSED. Bundles the stored text files back // into a ZIP. Binary files were never persisted, so they are omitted. Blocked // for non-pass verdicts and for hidden scans (unless the caller is an admin). function safeFilename(name: string): string { const cleaned = name .replace(/[^a-zA-Z0-9._-]+/g, "-") .replace(/^-+|-+$/g, "") .slice(0, 80); return cleaned || "skill"; } router.get("/scans/:id/download", async (req, res) => { const params = GetScanParams.safeParse(req.params); if (!params.success) return res.status(400).json({ message: "Ungültige ID" }); const [scan] = await db .select() .from(scansTable) .where(eq(scansTable.id, params.data.id)); if (!scan) return res.status(404).json({ message: "Scan nicht gefunden" }); if (scan.hidden) { const info = await resolveAuth(req); if (!info.isAdmin) return res.status(404).json({ message: "Scan nicht gefunden" }); } if (scan.verdict !== "pass") { return res.status(403).json({ message: "Nur Skills mit dem Ergebnis „Bestanden“ können heruntergeladen werden.", }); } const files = await db .select() .from(scanFilesTable) .where(eq(scanFilesTable.scanId, scan.id)); const entries: Record = {}; for (const f of files) { if (f.content === null) continue; // binary content was not stored entries[f.path] = strToU8(f.content); } if (Object.keys(entries).length === 0) { return res.status(404).json({ message: "Für dieses Skill sind keine herunterladbaren Dateien gespeichert.", }); } const zipped = zipSync(entries, { level: 6 }); const filename = `${safeFilename(scan.name)}.zip`; res.setHeader("Content-Type", "application/zip"); res.setHeader( "Content-Disposition", `attachment; filename="${filename}"`, ); return res.send(Buffer.from(zipped)); }); router.patch("/scans/:id", requireAdmin, async (req, res) => { const params = ModerateScanParams.safeParse(req.params); if (!params.success) return res.status(400).json({ message: "Ungültige ID" }); const parsed = ModerateScanBody.safeParse(req.body); if (!parsed.success) return res .status(400) .json({ message: "Ungültige Eingabe", details: parsed.error.issues }); const [updated] = await db .update(scansTable) .set({ hidden: parsed.data.hidden }) .where(eq(scansTable.id, params.data.id)) .returning(); if (!updated) return res.status(404).json({ message: "Scan nicht gefunden" }); return res.json(ModerateScanResponse.parse(serializeScan(updated))); }); router.get("/scans/:id/compare/:otherId", async (req, res) => { const params = CompareScansParams.safeParse(req.params); if (!params.success) return res.status(400).json({ message: "Ungültige ID" }); const { id, otherId } = params.data; const [current] = await db .select() .from(scansTable) .where(eq(scansTable.id, id)); const [previous] = await db .select() .from(scansTable) .where(eq(scansTable.id, otherId)); if (!current || !previous) return res.status(404).json({ message: "Scan nicht gefunden" }); const [currentFiles, previousFiles] = await Promise.all([ db.select().from(scanFilesTable).where(eq(scanFilesTable.scanId, id)), db.select().from(scanFilesTable).where(eq(scanFilesTable.scanId, otherId)), ]); const currentByPath = new Map(currentFiles.map((f) => [f.path, f])); const previousByPath = new Map(previousFiles.map((f) => [f.path, f])); const paths = Array.from( new Set([...currentByPath.keys(), ...previousByPath.keys()]), ).sort((a, b) => (a < b ? -1 : a > b ? 1 : 0)); const fileDiffs = paths.map((path) => { const cur = currentByPath.get(path) ?? null; const prev = previousByPath.get(path) ?? null; let status: "unchanged" | "modified" | "added" | "removed"; if (cur && !prev) status = "added"; else if (!cur && prev) status = "removed"; else if (cur && prev && cur.hash === prev.hash) status = "unchanged"; else status = "modified"; let diff: | { type: "context" | "add" | "remove"; text: string; previousLine: number | null; currentLine: number | null; }[] | null = null; if ( status === "modified" && cur?.content !== null && cur?.content !== undefined && prev?.content !== null && prev?.content !== undefined ) { diff = lineDiff(prev.content, cur.content); } return { path, status, previousHash: prev?.hash ?? null, currentHash: cur?.hash ?? null, previousSize: prev?.size ?? null, currentSize: cur?.size ?? null, previousHasContent: prev ? prev.content !== null : null, currentHasContent: cur ? cur.content !== null : null, lineDiff: diff, }; }); const side = (s: Scan) => ({ id: s.id, name: s.name, verdict: s.verdict, riskScore: s.riskScore, fileCount: s.fileCount, fingerprint: s.fingerprint, createdAt: s.createdAt.toISOString(), }); return res.json( CompareScansResponse.parse({ current: side(current), previous: side(previous), files: fileDiffs, }), ); }); router.get("/scans/:id/lineage", async (req, res) => { const params = GetScanParams.safeParse(req.params); if (!params.success) return res.status(400).json({ message: "Ungültige ID" }); const [scan] = await db .select() .from(scansTable) .where(eq(scansTable.id, params.data.id)); if (!scan) return res.status(404).json({ message: "Scan nicht gefunden" }); // Load only the columns needed to reconstruct the lineage graph for every // stored scan, then walk the connected component containing this scan. const all = await db .select({ id: scansTable.id, name: scansTable.name, verdict: scansTable.verdict, riskScore: scansTable.riskScore, relation: scansTable.relation, similarity: scansTable.similarity, comparedScanId: scansTable.comparedScanId, fingerprint: scansTable.fingerprint, createdAt: scansTable.createdAt, }) .from(scansTable); const byId = new Map(all.map((s) => [s.id, s])); // Build an undirected graph: scans are linked when one was compared against // the other (comparedScanId chain) or when they share an identical // fingerprint. The fingerprint family is the connected component. const adjacency = new Map>(); const addEdge = (a: number, b: number) => { if (!byId.has(a) || !byId.has(b) || a === b) return; (adjacency.get(a) ?? adjacency.set(a, new Set()).get(a)!).add(b); (adjacency.get(b) ?? adjacency.set(b, new Set()).get(b)!).add(a); }; const byFingerprint = new Map(); for (const s of all) { if (s.comparedScanId != null) addEdge(s.id, s.comparedScanId); if (s.fingerprint) { const list = byFingerprint.get(s.fingerprint) ?? []; list.push(s.id); byFingerprint.set(s.fingerprint, list); } } for (const ids of byFingerprint.values()) { for (let i = 1; i < ids.length; i++) addEdge(ids[0], ids[i]); } const family = new Set([scan.id]); const queue: number[] = [scan.id]; while (queue.length > 0) { const cur = queue.shift()!; for (const next of adjacency.get(cur) ?? []) { if (!family.has(next)) { family.add(next); queue.push(next); } } } const entries = Array.from(family) .map((fid) => byId.get(fid)!) .sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime()) .map((s) => ({ id: s.id, name: s.name, verdict: s.verdict, riskScore: s.riskScore, relation: s.relation, similarity: s.similarity, comparedScanId: s.comparedScanId, fingerprint: s.fingerprint, createdAt: s.createdAt.toISOString(), })); return res.json(GetScanLineageResponse.parse(entries)); }); router.delete("/scans/:id", requireAdmin, async (req, res) => { const params = DeleteScanParams.safeParse(req.params); if (!params.success) return res.status(400).json({ message: "Ungültige ID" }); await db.delete(scansTable).where(eq(scansTable.id, params.data.id)); return res.status(204).send(); }); // Generate the AI description for an existing scan that has none yet (older // scans were created before description generation existed). Reuses the same // generateSkillDescription() helper and the configured provider. A failure must // never alter the stored scan. router.post("/scans/:id/description", async (req, res) => { const params = GetScanParams.safeParse(req.params); if (!params.success) return res.status(400).json({ error: "Ungültige ID" }); const [scan] = await db .select() .from(scansTable) .where(eq(scansTable.id, params.data.id)); if (!scan) return res.status(404).json({ error: "Scan nicht gefunden" }); const storedFiles = await db .select() .from(scanFilesTable) .where(eq(scanFilesTable.scanId, scan.id)); const [provider] = await db .select() .from(aiProvidersTable) .where(eq(aiProvidersTable.enabled, true)) .limit(1); if (!provider) { return res.status(422).json({ error: "Kein aktiver KI-Provider konfiguriert. Bitte im Admin-Bereich einrichten.", }); } if (!provider.apiToken) { return res.status(422).json({ error: `Für den Provider "${provider.name}" ist kein API-Token hinterlegt.`, }); } const prompts: Prompt[] = await db.select().from(promptsTable); // Reconstruct ParsedFile inputs from the stored scan files. Binary files have // no stored content; generateSkillDescription skips empty content anyway. const files: ParsedFile[] = storedFiles.map((f) => ({ path: f.path, kind: f.kind as ParsedFile["kind"], language: f.language, content: f.content ?? "", size: f.size, hash: f.hash, isBinary: f.content === null, })); const description = await generateSkillDescription(provider, prompts, files); if (!description) { return res.status(422).json({ error: "Die Beschreibung konnte nicht erzeugt werden. Bitte Provider-Konfiguration und KI-Prompts prüfen.", }); } const [updated] = await db .update(scansTable) .set({ description }) .where(eq(scansTable.id, scan.id)) .returning(); const findings = await db .select() .from(findingsTable) .where(eq(findingsTable.scanId, scan.id)) .orderBy(findingsTable.id); return res.json( GetScanResponse.parse(await buildScanDetail(updated, storedFiles, findings)), ); }); export default router;