diff --git a/.agents/memory/MEMORY.md b/.agents/memory/MEMORY.md index 5968210..fba198e 100644 --- a/.agents/memory/MEMORY.md +++ b/.agents/memory/MEMORY.md @@ -1,3 +1,4 @@ - [lucide-react icon name collisions](lucide-icon-name-collisions.md) — `Badge`/`Activity` from lucide collide with shadcn/ui Badge and React 19 Activity; import Badge from ui, Activity from lucide. - [OpenAI gpt-5 temperature](openai-temperature-gpt5.md) — gpt-5* reject `temperature != 1`; omit temperature in OpenAI-compatible clients or AI analysis silently fails. - [NDJSON streaming on Replit](ndjson-streaming-express-replit.md) — use `res.on("close")`+`writableFinished` (NOT `req.on("close")`); persist on disconnect; proxy doesn't buffer; gate fallback to avoid dup rows. +- [Skill fingerprint & relation matching](skill-fingerprint-matching.md) — don't put display name in fingerprint path; match modified by file-path Jaccard (hash-Jaccard misses single-file edits), report content-aware similarity. diff --git a/.agents/memory/skill-fingerprint-matching.md b/.agents/memory/skill-fingerprint-matching.md new file mode 100644 index 0000000..956cec5 --- /dev/null +++ b/.agents/memory/skill-fingerprint-matching.md @@ -0,0 +1,37 @@ +--- +name: Skill fingerprint & relation matching +description: How SkillGuard decides new/identical/modified between scans, and two traps that break it. +--- + +# Skill fingerprint & relation matching + +The overall fingerprint is a SHA-256 over sorted `path\u0000fileHash` pairs. Relation +detection: exact fingerprint match → `identical`; else best file-tree overlap → `modified`; +else `new`. + +## Trap 1 — display name must not leak into the fingerprint +Text-pasted skills are parsed into a single file. That file's `path` must be a **stable +constant** (`SKILL.md`), NOT the user-supplied scan name. If the name is used as the path, +two byte-identical pastes with different names get different fingerprints and are +mis-classified as `modified` (sim 100) instead of `identical`. +**Why:** the fingerprint is meant to identify content/structure, not the display label. + +## Trap 2 — Jaccard over file *hashes* can't detect single-file modifications +For a single-file skill, any content edit changes the one hash completely, so Jaccard over +the hash set is 0 → wrongly classified `new`, and the compare/diff view (the whole point of +the feature) never links the two versions. +**Fix / how to apply:** match candidate scans by Jaccard over file **paths** (tie-break by +hash overlap), then report `similarity` as a content-aware score: identical files (same hash) +count 1.0, changed text files use line-level LCS ratio (`lineSimilarity` = 2·LCS/(a+b)), +added/removed or changed-binary files count 0. This yields a meaningful % for single-file +edits (e.g. one added line ≈ 90%) and still reduces to hash-equality for unchanged files. + +## Trap 3 — path overlap alone falsely links unrelated single-file skills +Because every text paste uses the constant path `SKILL.md` (Trap 1), path-Jaccard is always 1 +between any two text skills — so selecting/classifying `modified` by path overlap links totally +unrelated pastes (sim could be ~0). **Fix / how to apply:** select the candidate by the +content-aware similarity score itself (not path overlap), and only return `modified` when +`bestSimilarity >= MODIFIED_SIMILARITY_THRESHOLD` (40) OR at least one file is byte-identical +(hash overlap). Otherwise return `new`. Skip scoring candidates with no shared path AND no +shared hash (similarity would be 0). **Why:** classification must reflect actual content +overlap, not a coincidentally-shared file path. diff --git a/artifacts/api-server/src/lib/lineDiff.ts b/artifacts/api-server/src/lib/lineDiff.ts new file mode 100644 index 0000000..77576d8 --- /dev/null +++ b/artifacts/api-server/src/lib/lineDiff.ts @@ -0,0 +1,115 @@ +export type DiffLineType = "context" | "add" | "remove"; + +export type DiffLine = { + type: DiffLineType; + text: string; + previousLine: number | null; + currentLine: number | null; +}; + +const MAX_DIFF_LINES = 2000; + +/** + * Line-based similarity ratio in [0, 1] using the longest common subsequence + * of lines: 2 * LCS / (linesA + linesB). 1 means identical, 0 means nothing in + * common. Used to give a meaningful "modified" similarity for changed files. + */ +export function lineSimilarity(previous: string, current: string): number { + const a = previous.split(/\r?\n/); + const b = current.split(/\r?\n/); + const n = a.length; + const m = b.length; + if (n === 0 && m === 0) return 1; + + let prev = new Array(m + 1).fill(0); + let curr = new Array(m + 1).fill(0); + for (let i = n - 1; i >= 0; i--) { + for (let j = m - 1; j >= 0; j--) { + curr[j] = + a[i] === b[j] ? prev[j + 1] + 1 : Math.max(prev[j], curr[j + 1]); + } + [prev, curr] = [curr, prev]; + } + const lcs = prev[0]; + return (2 * lcs) / (n + m); +} + +/** + * Line-based diff using a longest-common-subsequence backtrace. Returns null + * when either side is too large to diff reasonably (caller should fall back to + * a plain "modified" indication). + */ +export function lineDiff(previous: string, current: string): DiffLine[] | null { + const a = previous.split(/\r?\n/); + const b = current.split(/\r?\n/); + + if (a.length > MAX_DIFF_LINES || b.length > MAX_DIFF_LINES) return null; + + const n = a.length; + const m = b.length; + + // dp[i][j] = LCS length of a[i:] and b[j:] + const dp: number[][] = Array.from({ length: n + 1 }, () => + new Array(m + 1).fill(0), + ); + for (let i = n - 1; i >= 0; i--) { + for (let j = m - 1; j >= 0; j--) { + dp[i][j] = + a[i] === b[j] + ? dp[i + 1][j + 1] + 1 + : Math.max(dp[i + 1][j], dp[i][j + 1]); + } + } + + const result: DiffLine[] = []; + let i = 0; + let j = 0; + while (i < n && j < m) { + if (a[i] === b[j]) { + result.push({ + type: "context", + text: a[i], + previousLine: i + 1, + currentLine: j + 1, + }); + i++; + j++; + } else if (dp[i + 1][j] >= dp[i][j + 1]) { + result.push({ + type: "remove", + text: a[i], + previousLine: i + 1, + currentLine: null, + }); + i++; + } else { + result.push({ + type: "add", + text: b[j], + previousLine: null, + currentLine: j + 1, + }); + j++; + } + } + while (i < n) { + result.push({ + type: "remove", + text: a[i], + previousLine: i + 1, + currentLine: null, + }); + i++; + } + while (j < m) { + result.push({ + type: "add", + text: b[j], + previousLine: null, + currentLine: j + 1, + }); + j++; + } + + return result; +} diff --git a/artifacts/api-server/src/lib/ruleCatalog.ts b/artifacts/api-server/src/lib/ruleCatalog.ts index 7f9ebc5..bec67b6 100644 --- a/artifacts/api-server/src/lib/ruleCatalog.ts +++ b/artifacts/api-server/src/lib/ruleCatalog.ts @@ -8,6 +8,8 @@ export type ParsedFile = { language: string | null; content: string; size: number; + hash: string; + isBinary: boolean; }; export type RawFinding = { diff --git a/artifacts/api-server/src/lib/skillFingerprint.ts b/artifacts/api-server/src/lib/skillFingerprint.ts new file mode 100644 index 0000000..969b574 --- /dev/null +++ b/artifacts/api-server/src/lib/skillFingerprint.ts @@ -0,0 +1,30 @@ +import { createHash } from "node:crypto"; + +export function hashBytes(bytes: Uint8Array | Buffer): string { + return createHash("sha256").update(bytes).digest("hex"); +} + +export function hashText(text: string): string { + return createHash("sha256").update(Buffer.from(text, "utf-8")).digest("hex"); +} + +/** + * Deterministic overall fingerprint over the sorted pairs of (path, fileHash). + * Any change to a file's content (its hash) or its path changes the result. + */ +export function computeFingerprint( + files: { path: string; hash: string }[], +): string { + const pairs = files + .map((f) => `${f.path}\u0000${f.hash}`) + .sort((a, b) => (a < b ? -1 : a > b ? 1 : 0)); + return createHash("sha256").update(pairs.join("\n")).digest("hex"); +} + +export function jaccard(a: Set, b: Set): number { + if (a.size === 0 && b.size === 0) return 0; + let inter = 0; + for (const x of a) if (b.has(x)) inter++; + const union = a.size + b.size - inter; + return union === 0 ? 0 : inter / union; +} diff --git a/artifacts/api-server/src/lib/skillParser.ts b/artifacts/api-server/src/lib/skillParser.ts index f30e2fc..548c239 100644 --- a/artifacts/api-server/src/lib/skillParser.ts +++ b/artifacts/api-server/src/lib/skillParser.ts @@ -1,5 +1,6 @@ import { Unzip, UnzipInflate, strFromU8 } from "fflate"; import type { FileKind, ParsedFile } from "./ruleCatalog"; +import { hashBytes } from "./skillFingerprint"; const LANG_BY_EXT: Record = { sh: "shell", @@ -152,6 +153,7 @@ export function parseZip(buffer: Buffer): ParsedFile[] { const bytes = concatChunks(chunks, fileBytes); chunks.length = 0; if (bytes.length === 0) return; + const hash = hashBytes(bytes); if (isProbablyBinary(bytes)) { result.push({ path, @@ -159,6 +161,8 @@ export function parseZip(buffer: Buffer): ParsedFile[] { language: null, content: "", size: bytes.length, + hash, + isBinary: true, }); } else { result.push({ @@ -167,6 +171,8 @@ export function parseZip(buffer: Buffer): ParsedFile[] { language: LANG_BY_EXT[extOf(path)] ?? null, content: strFromU8(bytes), size: bytes.length, + hash, + isBinary: false, }); } } @@ -192,6 +198,7 @@ export function parseZip(buffer: Buffer): ParsedFile[] { export function parseSingleFile(filename: string, buffer: Buffer): ParsedFile { const path = filename.replace(/\\/g, "/").split("/").pop() ?? filename; + const hash = hashBytes(buffer); if (isProbablyBinary(new Uint8Array(buffer))) { return { path, @@ -199,6 +206,8 @@ export function parseSingleFile(filename: string, buffer: Buffer): ParsedFile { language: null, content: "", size: buffer.length, + hash, + isBinary: true, }; } return { @@ -207,16 +216,20 @@ export function parseSingleFile(filename: string, buffer: Buffer): ParsedFile { language: LANG_BY_EXT[extOf(path)] ?? null, content: buffer.toString("utf-8"), size: buffer.length, + hash, + isBinary: false, }; } -export function parseText(text: string, name: string): ParsedFile { +export function parseText(text: string): ParsedFile { return { - path: name || "SKILL.md", + path: "SKILL.md", kind: "instruction", language: "markdown", content: text, size: Buffer.byteLength(text, "utf-8"), + hash: hashBytes(Buffer.from(text, "utf-8")), + isBinary: false, }; } diff --git a/artifacts/api-server/src/routes/scans.ts b/artifacts/api-server/src/routes/scans.ts index 798a47d..30ff7f9 100644 --- a/artifacts/api-server/src/routes/scans.ts +++ b/artifacts/api-server/src/routes/scans.ts @@ -7,14 +7,17 @@ import { type Scan, type ScanFile, type Finding, + type ScanRelation, } from "@workspace/db"; -import { eq, desc } from "drizzle-orm"; +import { eq, desc, count } from "drizzle-orm"; import { ListScansResponse, CreateScanBody, GetScanParams, GetScanResponse, DeleteScanParams, + CompareScansParams, + CompareScansResponse, } from "@workspace/api-zod"; import { parseZip, @@ -24,6 +27,8 @@ import { } from "../lib/skillParser"; import { analyzeSkill, type EngineResult } from "../lib/scanEngine"; import { STATIC_RULES, AI_RULES, type ParsedFile } from "../lib/ruleCatalog"; +import { computeFingerprint } from "../lib/skillFingerprint"; +import { lineDiff, lineSimilarity } from "../lib/lineDiff"; import { logger } from "../lib/logger"; const router: IRouter = Router(); @@ -42,6 +47,10 @@ export function serializeScan(scan: Scan) { aiUsed: scan.aiUsed, aiError: scan.aiError, findingCounts: scan.findingCounts, + fingerprint: scan.fingerprint, + relation: scan.relation, + similarity: scan.similarity, + comparedScanId: scan.comparedScanId, createdAt: scan.createdAt.toISOString(), }; } @@ -52,9 +61,43 @@ function serializeFile(f: ScanFile) { kind: f.kind, language: f.language, size: f.size, + hash: f.hash, + hasContent: f.content !== null, }; } +type ComparedScan = { + id: number; + name: string; + verdict: string; + riskScore: number; + createdAt: string; +}; + +async function resolveComparedScan( + id: number | null, +): Promise { + if (id == null) return null; + const [s] = await db.select().from(scansTable).where(eq(scansTable.id, id)); + if (!s) return null; + return { + id: s.id, + name: s.name, + verdict: s.verdict, + riskScore: s.riskScore, + createdAt: s.createdAt.toISOString(), + }; +} + +async function countFingerprint(fingerprint: string): Promise { + if (!fingerprint) return 1; + const [row] = await db + .select({ c: count() }) + .from(scansTable) + .where(eq(scansTable.fingerprint, fingerprint)); + return Number(row?.c ?? 1); +} + function serializeFinding(f: Finding) { return { id: f.id, @@ -75,15 +118,173 @@ function serializeScanDetail( scan: Scan, files: ScanFile[], findings: Finding[], + checkCount: number, + comparedScan: ComparedScan | null, ) { return { ...serializeScan(scan), checkpoints: scan.checkpoints ?? [], files: files.map(serializeFile), findings: [...findings].sort((a, b) => a.id - b.id).map(serializeFinding), + checkCount, + comparedScan, }; } +async function buildScanDetail( + scan: Scan, + files: ScanFile[], + findings: Finding[], +) { + const [checkCount, comparedScan] = await Promise.all([ + countFingerprint(scan.fingerprint), + resolveComparedScan(scan.comparedScanId), + ]); + return serializeScanDetail(scan, files, findings, checkCount, comparedScan); +} + +type RelationInfo = { + relation: ScanRelation; + similarity: number | null; + comparedScanId: number | null; +}; + +/** + * Determine how the freshly parsed skill relates to the scans already stored. + * Exact fingerprint match -> identical; otherwise the most content-similar prior + * skill (when it overlaps enough or shares a byte-identical file) -> modified; + * nothing meaningfully in common -> new. + */ +async function computeRelation( + fingerprint: string, + files: ParsedFile[], +): Promise { + if (fingerprint) { + const identical = await db + .select({ id: scansTable.id }) + .from(scansTable) + .where(eq(scansTable.fingerprint, fingerprint)) + .orderBy(desc(scansTable.createdAt)) + .limit(1); + if (identical.length > 0) { + return { relation: "identical", similarity: 100, comparedScanId: identical[0].id }; + } + } + + // Group every prior scan's files so we can measure how much of the file tree + // overlaps. We match on file *paths* (so single-file skills whose content + // changed are still recognised as a modified version of the same skill) and + // fall back to hash overlap to disambiguate equally-good path matches. + const priorFiles = await db + .select({ + scanId: scanFilesTable.scanId, + path: scanFilesTable.path, + hash: scanFilesTable.hash, + content: scanFilesTable.content, + }) + .from(scanFilesTable); + + const byScan = new Map>(); + for (const row of priorFiles) { + if (!row.path) continue; + let map = byScan.get(row.scanId); + if (!map) { + map = new Map(); + byScan.set(row.scanId, map); + } + map.set(row.path, { hash: row.hash, content: row.content }); + } + + const newPaths = new Set(files.map((f) => f.path)); + const newHashes = new Set( + files.map((f) => f.hash).filter((h): h is string => Boolean(h)), + ); + + // Score every prior scan by content-aware similarity (not just path overlap). + // Path overlap alone is misleading: single-file text skills always share the + // path "SKILL.md", so unrelated pastes would otherwise look related. We pick + // the most similar prior scan and only call it a modified version when the + // content actually overlaps enough OR at least one file is byte-identical. + let bestId: number | null = null; + let bestSimilarity = -1; + let bestHasHashOverlap = false; + for (const [scanId, map] of byScan) { + const priorHashes = new Set( + Array.from(map.values()) + .map((v) => v.hash) + .filter(Boolean), + ); + const sharesPath = Array.from(map.keys()).some((p) => newPaths.has(p)); + const hashOverlap = Array.from(priorHashes).some((h) => newHashes.has(h)); + // Nothing in common at all -> cannot be a version of this skill. + if (!sharesPath && !hashOverlap) continue; + + const similarity = computeContentSimilarity(files, map); + if ( + similarity > bestSimilarity || + (similarity === bestSimilarity && hashOverlap && !bestHasHashOverlap) + ) { + bestSimilarity = similarity; + bestId = scanId; + bestHasHashOverlap = hashOverlap; + } + } + + // Treat as a modified version only with a meaningful content overlap or a + // shared byte-identical file; otherwise it is a genuinely new skill that just + // happens to reuse a common file path. + if ( + bestId !== null && + (bestHasHashOverlap || bestSimilarity >= MODIFIED_SIMILARITY_THRESHOLD) + ) { + return { + relation: "modified", + similarity: bestSimilarity, + comparedScanId: bestId, + }; + } + + return { relation: "new", similarity: null, comparedScanId: null }; +} + +/** + * Minimum content similarity (0-100) for a non-identical upload to count as a + * modified version of a prior scan rather than a brand-new skill. Keeps + * unrelated single-file pastes (which always share the "SKILL.md" path) from + * being falsely linked together. + */ +const MODIFIED_SIMILARITY_THRESHOLD = 40; + +/** + * Content-aware similarity (0-100) between the new files and a matched prior + * scan. Identical files (same hash) count fully; changed text files use the + * line-level similarity; added/removed or changed binary files count as 0. + */ +function computeContentSimilarity( + newFiles: ParsedFile[], + prior: Map, +): number { + const newByPath = new Map(newFiles.map((f) => [f.path, f])); + const paths = new Set([...newByPath.keys(), ...prior.keys()]); + if (paths.size === 0) return 0; + + let total = 0; + for (const path of paths) { + const cur = newByPath.get(path); + const prev = prior.get(path); + if (!cur || !prev) continue; // added or removed -> 0 + if (cur.hash && cur.hash === prev.hash) { + total += 1; + continue; + } + if (!cur.isBinary && prev.content !== null) { + total += lineSimilarity(prev.content, cur.content); + } + // changed binary -> 0 + } + return Math.round((total / paths.size) * 100); +} + type ParseResult = | { ok: true; files: ParsedFile[] } | { ok: false; status: number; message: string }; @@ -107,7 +308,7 @@ function parseScanInput(input: CreateScanInput): ParseResult { } else { if (!input.text || !input.text.trim()) return { ok: false, status: 400, message: "Text fehlt." }; - files = [parseText(input.text, input.name ?? "SKILL.md")]; + files = [parseText(input.text)]; } if (files.length === 0) return { @@ -133,6 +334,14 @@ async function persistScan( files: ParsedFile[], result: EngineResult, ): Promise<{ scan: Scan; files: ScanFile[]; findings: Finding[] }> { + const fingerprint = computeFingerprint( + files.map((f) => ({ path: f.path, hash: f.hash })), + ); + // Determine relation against the existing database BEFORE inserting the new + // scan so the comparison excludes this scan itself. The skill is always + // re-scanned; identical uploads are stored as duplicates. + const relationInfo = await computeRelation(fingerprint, files); + const [scan] = await db .insert(scansTable) .values({ @@ -146,6 +355,10 @@ async function persistScan( aiError: result.aiError, findingCounts: result.counts, checkpoints: result.checkpoints, + fingerprint, + relation: relationInfo.relation, + similarity: relationInfo.similarity, + comparedScanId: relationInfo.comparedScanId, }) .returning(); @@ -160,6 +373,8 @@ async function persistScan( kind: f.kind, language: f.language, size: f.size, + hash: f.hash, + content: f.isBinary ? null : f.content, })), ) .returning(); @@ -224,7 +439,7 @@ router.post("/scans", async (req, res) => { return res .status(201) - .json(GetScanResponse.parse(serializeScanDetail(scan, insertedFiles, findings))); + .json(GetScanResponse.parse(await buildScanDetail(scan, insertedFiles, findings))); }); const STREAM_PACING_MS = 80; @@ -331,7 +546,97 @@ router.get("/scans/:id", async (req, res) => { .where(eq(findingsTable.scanId, scan.id)) .orderBy(findingsTable.id); - return res.json(GetScanResponse.parse(serializeScanDetail(scan, files, findings))); + return res.json(GetScanResponse.parse(await buildScanDetail(scan, files, findings))); +}); + +router.get("/scans/:id/compare/:otherId", async (req, res) => { + const params = CompareScansParams.safeParse(req.params); + if (!params.success) + return res.status(400).json({ message: "Ungültige ID" }); + + const { id, otherId } = params.data; + + const [current] = await db + .select() + .from(scansTable) + .where(eq(scansTable.id, id)); + const [previous] = await db + .select() + .from(scansTable) + .where(eq(scansTable.id, otherId)); + + if (!current || !previous) + return res.status(404).json({ message: "Scan nicht gefunden" }); + + const [currentFiles, previousFiles] = await Promise.all([ + db.select().from(scanFilesTable).where(eq(scanFilesTable.scanId, id)), + db.select().from(scanFilesTable).where(eq(scanFilesTable.scanId, otherId)), + ]); + + const currentByPath = new Map(currentFiles.map((f) => [f.path, f])); + const previousByPath = new Map(previousFiles.map((f) => [f.path, f])); + const paths = Array.from( + new Set([...currentByPath.keys(), ...previousByPath.keys()]), + ).sort((a, b) => (a < b ? -1 : a > b ? 1 : 0)); + + const fileDiffs = paths.map((path) => { + const cur = currentByPath.get(path) ?? null; + const prev = previousByPath.get(path) ?? null; + + let status: "unchanged" | "modified" | "added" | "removed"; + if (cur && !prev) status = "added"; + else if (!cur && prev) status = "removed"; + else if (cur && prev && cur.hash === prev.hash) status = "unchanged"; + else status = "modified"; + + let diff: + | { + type: "context" | "add" | "remove"; + text: string; + previousLine: number | null; + currentLine: number | null; + }[] + | null = null; + if ( + status === "modified" && + cur?.content !== null && + cur?.content !== undefined && + prev?.content !== null && + prev?.content !== undefined + ) { + diff = lineDiff(prev.content, cur.content); + } + + return { + path, + status, + previousHash: prev?.hash ?? null, + currentHash: cur?.hash ?? null, + previousSize: prev?.size ?? null, + currentSize: cur?.size ?? null, + previousHasContent: prev ? prev.content !== null : null, + currentHasContent: cur ? cur.content !== null : null, + lineDiff: diff, + }; + }); + + const side = (s: Scan) => ({ + id: s.id, + name: s.name, + verdict: s.verdict, + riskScore: s.riskScore, + fileCount: s.fileCount, + fingerprint: s.fingerprint, + createdAt: s.createdAt.toISOString(), + }); + + return res.json( + CompareScansResponse.parse({ + current: side(current), + previous: side(previous), + files: fileDiffs, + }), + ); }); router.delete("/scans/:id", async (req, res) => { diff --git a/artifacts/skillguard/src/App.tsx b/artifacts/skillguard/src/App.tsx index 5b7b007..0996547 100644 --- a/artifacts/skillguard/src/App.tsx +++ b/artifacts/skillguard/src/App.tsx @@ -8,6 +8,7 @@ import NotFound from "@/pages/not-found"; import Dashboard from "@/pages/dashboard"; import ScanForm from "@/pages/scan-form"; import ScanReport from "@/pages/scan-report"; +import ScanCompare from "@/pages/scan-compare"; import ScanHistory from "@/pages/scan-history"; import Admin from "@/pages/admin"; @@ -20,6 +21,7 @@ function Router() { + diff --git a/artifacts/skillguard/src/components/ui-helpers.tsx b/artifacts/skillguard/src/components/ui-helpers.tsx index 04b137c..23c3a35 100644 --- a/artifacts/skillguard/src/components/ui-helpers.tsx +++ b/artifacts/skillguard/src/components/ui-helpers.tsx @@ -1,5 +1,5 @@ import { Badge } from "@/components/ui/badge"; -import { ShieldCheck, ShieldAlert, Shield, AlertTriangle, Info, AlertCircle, AlertOctagon, CheckCircle2, MinusCircle, XCircle } from "lucide-react"; +import { ShieldCheck, ShieldAlert, Shield, AlertTriangle, Info, AlertCircle, AlertOctagon, CheckCircle2, MinusCircle, XCircle, Sparkles, Copy, GitBranch } from "lucide-react"; export const CHECKPOINT_STATUS_LABELS: Record = { pass: "Unauffällig", @@ -60,3 +60,22 @@ export function AxisBadge({ axis, className }: { axis: string, className?: strin Datenschutz ); } + +export const RELATION_LABELS: Record = { + new: "Neu", + identical: "Identisch", + modified: "Verändert", +}; + +export function RelationBadge({ relation, className }: { relation: string | null | undefined, className?: string }) { + switch (relation) { + case "new": + return Neu; + case "identical": + return Identisch; + case "modified": + return Verändert; + default: + return Unbekannt; + } +} diff --git a/artifacts/skillguard/src/pages/scan-compare.tsx b/artifacts/skillguard/src/pages/scan-compare.tsx new file mode 100644 index 0000000..57186d3 --- /dev/null +++ b/artifacts/skillguard/src/pages/scan-compare.tsx @@ -0,0 +1,217 @@ +import { useState } from "react"; +import { useRoute, Link } from "wouter"; +import { useCompareScans, getCompareScansQueryKey } from "@workspace/api-client-react"; +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; +import { Badge } from "@/components/ui/badge"; +import { Skeleton } from "@/components/ui/skeleton"; +import { Button } from "@/components/ui/button"; +import { VerdictBadge } from "@/components/ui-helpers"; +import { formatDate } from "@/lib/format"; +import { ShieldQuestion, ArrowLeft, FileCode, ChevronDown, ChevronRight } from "lucide-react"; +import type { ScanComparisonSide, ScanFileDiff } from "@workspace/api-client-react"; + +const STATUS_LABELS: Record = { + unchanged: "Unverändert", + modified: "Geändert", + added: "Neu", + removed: "Entfernt", +}; + +function StatusBadge({ status }: { status: string }) { + switch (status) { + case "unchanged": + return Unverändert; + case "modified": + return Geändert; + case "added": + return Neu; + case "removed": + return Entfernt; + default: + return {status}; + } +} + +function SkillSummaryCard({ side, label }: { side: ScanComparisonSide; label: string }) { + return ( + + + {label} + + + {side.name || `Scan #${side.id}`} + + + + + +
+ Risiko-Score + {side.riskScore} / 100 +
+
+ Dateien + {side.fileCount} +
+
+ Erstellt + {formatDate(side.createdAt)} +
+
+ Fingerprint + + {side.fingerprint ? `${side.fingerprint.slice(0, 24)}…` : "-"} + +
+
+
+ ); +} + +function FileDiffRow({ file }: { file: ScanFileDiff }) { + const [open, setOpen] = useState(false); + const canExpand = file.status === "modified" && file.lineDiff && file.lineDiff.length > 0; + + return ( +
+ + + {open && canExpand && ( +
+ + + {file.lineDiff!.map((line, i) => { + const bg = + line.type === "add" ? "bg-emerald-950/60" : + line.type === "remove" ? "bg-rose-950/60" : ""; + const sign = line.type === "add" ? "+" : line.type === "remove" ? "-" : " "; + const textColor = + line.type === "add" ? "text-emerald-300" : + line.type === "remove" ? "text-rose-300" : "text-slate-400"; + return ( + + + + + + + ); + })} + +
{line.previousLine ?? ""}{line.currentLine ?? ""}{sign}{line.text || " "}
+
+ )} +
+ ); +} + +export default function ScanCompare() { + const [, params] = useRoute("/vergleich/:id/:otherId"); + const id = Number(params?.id); + const otherId = Number(params?.otherId); + const valid = Number.isFinite(id) && Number.isFinite(otherId) && id > 0 && otherId > 0; + + const { data, isLoading, error } = useCompareScans(id, otherId, { + query: { + enabled: valid, + queryKey: getCompareScansQueryKey(id, otherId), + }, + }); + + if (isLoading || (!data && !error && valid)) { + return ( +
+ +
+ + +
+ +
+ ); + } + + if (error || !data || !valid) { + return ( +
+ +

Vergleich nicht möglich

+

Einer der beiden Scans existiert nicht oder konnte nicht geladen werden.

+
+ ); + } + + const counts = data.files.reduce( + (acc, f) => { + acc[f.status] = (acc[f.status] ?? 0) + 1; + return acc; + }, + {} as Record, + ); + + return ( +
+
+ +

Skill-Vergleich

+

+ Gegenüberstellung des ursprünglich gespeicherten Skills und der aktuell geprüften Variante – inklusive Datei-Status und zeilenweisem Diff. +

+
+ +
+ + +
+ + + + Datei-Vergleich + + {(["unchanged", "modified", "added", "removed"] as const).map((s) => + counts[s] ? ( + + + {counts[s]} + + ) : null, + )} + + + +
+ {data.files.length === 0 ? ( +
Keine Dateien zum Vergleichen.
+ ) : ( + data.files.map((file) => ) + )} +
+

+ Geänderte Textdateien lassen sich aufklappen, um den zeilenweisen Unterschied anzuzeigen. +

+
+
+
+ ); +} diff --git a/artifacts/skillguard/src/pages/scan-history.tsx b/artifacts/skillguard/src/pages/scan-history.tsx index 19c9665..322b8b2 100644 --- a/artifacts/skillguard/src/pages/scan-history.tsx +++ b/artifacts/skillguard/src/pages/scan-history.tsx @@ -6,7 +6,7 @@ import { Badge } from "@/components/ui/badge"; import { Skeleton } from "@/components/ui/skeleton"; import { Button } from "@/components/ui/button"; import { AlertDialog, AlertDialogAction, AlertDialogCancel, AlertDialogContent, AlertDialogDescription, AlertDialogFooter, AlertDialogHeader, AlertDialogTitle, AlertDialogTrigger } from "@/components/ui/alert-dialog"; -import { VerdictBadge } from "@/components/ui-helpers"; +import { VerdictBadge, RelationBadge } from "@/components/ui-helpers"; import { formatDate } from "@/lib/format"; import { Search, Trash2, ArrowRight } from "lucide-react"; import { useToast } from "@/hooks/use-toast"; @@ -65,9 +65,10 @@ export default function ScanHistory() {
-
+
{scan.name || `Scan #${scan.id}`} + {scan.relation && scan.relation !== "new" && }
{formatDate(scan.createdAt)} diff --git a/artifacts/skillguard/src/pages/scan-report.tsx b/artifacts/skillguard/src/pages/scan-report.tsx index a6a215c..25c33a8 100644 --- a/artifacts/skillguard/src/pages/scan-report.tsx +++ b/artifacts/skillguard/src/pages/scan-report.tsx @@ -1,5 +1,5 @@ import { useState, useMemo } from "react"; -import { useRoute } from "wouter"; +import { useRoute, Link } from "wouter"; import { useGetScan, getGetScanQueryKey } from "@workspace/api-client-react"; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; import { Badge } from "@/components/ui/badge"; @@ -9,9 +9,9 @@ import { Tabs, TabsList, TabsTrigger, TabsContent } from "@/components/ui/tabs"; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"; import { Button } from "@/components/ui/button"; import { Progress } from "@/components/ui/progress"; -import { VerdictBadge, SeverityBadge, AxisBadge, CheckpointStatusBadge, CHECKPOINT_STATUS_LABELS } from "@/components/ui-helpers"; +import { VerdictBadge, SeverityBadge, AxisBadge, CheckpointStatusBadge, CHECKPOINT_STATUS_LABELS, RelationBadge } from "@/components/ui-helpers"; import { formatDate } from "@/lib/format"; -import { ShieldQuestion, AlertTriangle, Download, FileCode, CheckCircle2, Code, Shield, FileDown, ListChecks } from "lucide-react"; +import { ShieldQuestion, AlertTriangle, Download, FileCode, CheckCircle2, Code, Shield, FileDown, ListChecks, Fingerprint, GitCompare, History } from "lucide-react"; import type { ScanDetail } from "@workspace/api-client-react"; export default function ScanReport() { @@ -207,6 +207,64 @@ export default function ScanReport() {
+ + + + Skill-Fingerprint + + + Eindeutiger Erkennungswert dieses Skills. Identische und veränderte Versionen werden anhand des Fingerprints erkannt. + + + +
+ + {data.relation === "modified" && data.similarity != null && ( + {data.similarity}% ähnlich + )} + + + {data.checkCount === 1 + ? "Erstmals geprüft" + : `${data.checkCount}-mal geprüft (gleicher Fingerprint)`} + +
+ +
+ Fingerprint + + {data.fingerprint || "-"} + +
+ + {data.comparedScan && ( +
+
+ + {data.relation === "identical" ? "Identisch zu" : "Ähnlichster bekannter Skill"} + +
+ + {data.comparedScan.name || `Scan #${data.comparedScan.id}`} + + + · + Risiko {data.comparedScan.riskScore} / 100 + · + {formatDate(data.comparedScan.createdAt)} +
+
+ +
+ )} +
+
+ Auffälligkeiten ({data.findings.length}) @@ -371,13 +429,14 @@ export default function ScanReport() { Pfad Typ Sprache + Hash (SHA-256) Größe {data.files.length === 0 ? ( - Keine Dateien verfügbar. + Keine Dateien verfügbar. ) : ( data.files.map((file, i) => ( @@ -389,6 +448,12 @@ export default function ScanReport() { {file.language || "-"} + + {file.hash ? file.hash.slice(0, 12) : "-"} + {!file.hasContent && ( + binär + )} + {file.size} B )) diff --git a/lib/api-client-react/src/generated/api.schemas.ts b/lib/api-client-react/src/generated/api.schemas.ts index a8a222a..86dccc8 100644 --- a/lib/api-client-react/src/generated/api.schemas.ts +++ b/lib/api-client-react/src/generated/api.schemas.ts @@ -74,6 +74,19 @@ export const ScanVerdict = { block: 'block', } as const; +/** + * Relation to previously stored skills + * @nullable + */ +export type ScanRelation = typeof ScanRelation[keyof typeof ScanRelation] | null; + + +export const ScanRelation = { + new: 'new', + identical: 'identical', + modified: 'modified', +} as const; + export interface FindingCounts { critical: number; high: number; @@ -97,6 +110,23 @@ export interface Scan { /** @nullable */ aiError?: string | null; findingCounts: FindingCounts; + /** Deterministic hash over all files (path + per-file hash) */ + fingerprint: string; + /** + * Relation to previously stored skills + * @nullable + */ + relation: ScanRelation; + /** + * Content-aware similarity (0-100) to the compared skill (identical files count fully, changed text files use line-level similarity) + * @nullable + */ + similarity: number | null; + /** + * The scan this one was compared against, if any + * @nullable + */ + comparedScanId: number | null; createdAt: string; } @@ -179,6 +209,10 @@ export interface ScanFile { /** @nullable */ language?: string | null; size: number; + /** SHA-256 hash of the file content */ + hash: string; + /** Whether the text content was stored (false for binary files) */ + hasContent: boolean; } export type FindingAxis = typeof FindingAxis[keyof typeof FindingAxis]; @@ -226,11 +260,102 @@ export interface Finding { detectedBy: FindingDetectedBy; } -export type ScanDetail = Scan & { +export type ComparedScanVerdict = typeof ComparedScanVerdict[keyof typeof ComparedScanVerdict]; + + +export const ComparedScanVerdict = { + pass: 'pass', + review: 'review', + block: 'block', +} as const; + +export interface ComparedScan { + id: number; + name: string; + verdict: ComparedScanVerdict; + riskScore: number; + createdAt: string; +} + +export type ScanDetail = Scan & ({ files: ScanFile[]; findings: Finding[]; checkpoints: ScanCheckpoint[]; -}; + /** How often a skill with this exact fingerprint was scanned */ + checkCount: number; + comparedScan: ComparedScan | null; +}); + +export type ScanComparisonSideVerdict = typeof ScanComparisonSideVerdict[keyof typeof ScanComparisonSideVerdict]; + + +export const ScanComparisonSideVerdict = { + pass: 'pass', + review: 'review', + block: 'block', +} as const; + +export interface ScanComparisonSide { + id: number; + name: string; + verdict: ScanComparisonSideVerdict; + riskScore: number; + fileCount: number; + fingerprint: string; + createdAt: string; +} + +export type DiffLineType = typeof DiffLineType[keyof typeof DiffLineType]; + + +export const DiffLineType = { + context: 'context', + add: 'add', + remove: 'remove', +} as const; + +export interface DiffLine { + type: DiffLineType; + text: string; + /** @nullable */ + previousLine: number | null; + /** @nullable */ + currentLine: number | null; +} + +export type ScanFileDiffStatus = typeof ScanFileDiffStatus[keyof typeof ScanFileDiffStatus]; + + +export const ScanFileDiffStatus = { + unchanged: 'unchanged', + modified: 'modified', + added: 'added', + removed: 'removed', +} as const; + +export interface ScanFileDiff { + path: string; + status: ScanFileDiffStatus; + /** @nullable */ + previousHash: string | null; + /** @nullable */ + currentHash: string | null; + /** @nullable */ + previousSize: number | null; + /** @nullable */ + currentSize: number | null; + /** @nullable */ + previousHasContent: boolean | null; + /** @nullable */ + currentHasContent: boolean | null; + lineDiff: DiffLine[] | null; +} + +export interface ScanComparison { + current: ScanComparisonSide; + previous: ScanComparisonSide; + files: ScanFileDiff[]; +} export type AiProviderApiType = typeof AiProviderApiType[keyof typeof AiProviderApiType]; diff --git a/lib/api-client-react/src/generated/api.ts b/lib/api-client-react/src/generated/api.ts index e0c5bce..69a0938 100644 --- a/lib/api-client-react/src/generated/api.ts +++ b/lib/api-client-react/src/generated/api.ts @@ -33,6 +33,7 @@ import type { Rule, RuleUpdate, Scan, + ScanComparison, ScanDetail, SkillScanInput } from './api.schemas'; @@ -354,6 +355,89 @@ export const useCreateScan = , return useMutation(getCreateScanMutationOptions(options)); } +export const getCompareScansUrl = (id: number, + otherId: number,) => { + + + + + return `/api/scans/${id}/compare/${otherId}` +} + +/** + * Returns a file-level diff between the current scan (id) and a previously stored scan (otherId), including line-by-line diffs for modified text files. + * @summary Compare two scans on the file level + */ +export const compareScans = async (id: number, + otherId: number, options?: RequestInit): Promise => { + + return customFetch(getCompareScansUrl(id,otherId), + { + ...options, + method: 'GET' + + + } +);} + + + + + +export const getCompareScansQueryKey = (id: number, + otherId: number,) => { + return [ + `/api/scans/${id}/compare/${otherId}` + ] as const; + } + + +export const getCompareScansQueryOptions = >, TError = ErrorType>(id: number, + otherId: number, options?: { query?:UseQueryOptions>, TError, TData>, request?: SecondParameter} +) => { + +const {query: queryOptions, request: requestOptions} = options ?? {}; + + const queryKey = queryOptions?.queryKey ?? getCompareScansQueryKey(id,otherId); + + + + const queryFn: QueryFunction>> = ({ signal }) => compareScans(id,otherId, { signal, ...requestOptions }); + + + + + + return { queryKey, queryFn, enabled: !!(id && otherId), ...queryOptions} as UseQueryOptions>, TError, TData> & { queryKey: QueryKey } +} + +export type CompareScansQueryResult = NonNullable>> +export type CompareScansQueryError = ErrorType + + +/** + * @summary Compare two scans on the file level + */ + +export function useCompareScans>, TError = ErrorType>( + id: number, + otherId: number, options?: { query?:UseQueryOptions>, TError, TData>, request?: SecondParameter} + + ): UseQueryResult & { queryKey: QueryKey } { + + const queryOptions = getCompareScansQueryOptions(id,otherId,options) + + const query = useQuery(queryOptions) as UseQueryResult & { queryKey: QueryKey }; + + return { ...query, queryKey: queryOptions.queryKey }; +} + + + + + + + export const getGetScanUrl = (id: number,) => { diff --git a/lib/api-spec/openapi.yaml b/lib/api-spec/openapi.yaml index e8b92e1..c7eeeb6 100644 --- a/lib/api-spec/openapi.yaml +++ b/lib/api-spec/openapi.yaml @@ -91,6 +91,40 @@ paths: schema: $ref: "#/components/schemas/ApiError" + /scans/{id}/compare/{otherId}: + get: + operationId: compareScans + tags: [scans] + summary: Compare two scans on the file level + description: >- + Returns a file-level diff between the current scan (id) and a previously + stored scan (otherId), including line-by-line diffs for modified text + files. + parameters: + - name: id + in: path + required: true + schema: + type: integer + - name: otherId + in: path + required: true + schema: + type: integer + responses: + "200": + description: File-level comparison + content: + application/json: + schema: + $ref: "#/components/schemas/ScanComparison" + "404": + description: Not found + content: + application/json: + schema: + $ref: "#/components/schemas/ApiError" + /scans/{id}: get: operationId: getScan @@ -369,6 +403,10 @@ components: - fileCount - aiUsed - findingCounts + - fingerprint + - relation + - similarity + - comparedScanId - createdAt properties: id: @@ -394,6 +432,19 @@ components: type: ["string", "null"] findingCounts: $ref: "#/components/schemas/FindingCounts" + fingerprint: + type: string + description: Deterministic hash over all files (path + per-file hash) + relation: + type: ["string", "null"] + enum: [new, identical, modified, null] + description: Relation to previously stored skills + similarity: + type: ["integer", "null"] + description: Content-aware similarity (0-100) to the compared skill (identical files count fully, changed text files use line-level similarity) + comparedScanId: + type: ["integer", "null"] + description: The scan this one was compared against, if any createdAt: type: string @@ -450,7 +501,7 @@ components: ScanFile: type: object - required: [path, kind, size] + required: [path, kind, size, hash, hasContent] properties: path: type: string @@ -461,6 +512,12 @@ components: type: ["string", "null"] size: type: integer + hash: + type: string + description: SHA-256 hash of the file content + hasContent: + type: boolean + description: Whether the text content was stored (false for binary files) Finding: type: object @@ -503,7 +560,7 @@ components: allOf: - $ref: "#/components/schemas/Scan" - type: object - required: [files, findings, checkpoints] + required: [files, findings, checkpoints, checkCount, comparedScan] properties: files: type: array @@ -517,6 +574,113 @@ components: type: array items: $ref: "#/components/schemas/ScanCheckpoint" + checkCount: + type: integer + description: How often a skill with this exact fingerprint was scanned + comparedScan: + oneOf: + - $ref: "#/components/schemas/ComparedScan" + - type: "null" + + ComparedScan: + type: object + required: [id, name, verdict, riskScore, createdAt] + properties: + id: + type: integer + name: + type: string + verdict: + type: string + enum: [pass, review, block] + riskScore: + type: integer + createdAt: + type: string + + ScanComparisonSide: + type: object + required: [id, name, verdict, riskScore, fileCount, fingerprint, createdAt] + properties: + id: + type: integer + name: + type: string + verdict: + type: string + enum: [pass, review, block] + riskScore: + type: integer + fileCount: + type: integer + fingerprint: + type: string + createdAt: + type: string + + DiffLine: + type: object + required: [type, text, previousLine, currentLine] + properties: + type: + type: string + enum: [context, add, remove] + text: + type: string + previousLine: + type: ["integer", "null"] + currentLine: + type: ["integer", "null"] + + ScanFileDiff: + type: object + required: + - path + - status + - previousHash + - currentHash + - previousSize + - currentSize + - previousHasContent + - currentHasContent + - lineDiff + properties: + path: + type: string + status: + type: string + enum: [unchanged, modified, added, removed] + previousHash: + type: ["string", "null"] + currentHash: + type: ["string", "null"] + previousSize: + type: ["integer", "null"] + currentSize: + type: ["integer", "null"] + previousHasContent: + type: ["boolean", "null"] + currentHasContent: + type: ["boolean", "null"] + lineDiff: + oneOf: + - type: array + items: + $ref: "#/components/schemas/DiffLine" + - type: "null" + + ScanComparison: + type: object + required: [current, previous, files] + properties: + current: + $ref: "#/components/schemas/ScanComparisonSide" + previous: + $ref: "#/components/schemas/ScanComparisonSide" + files: + type: array + items: + $ref: "#/components/schemas/ScanFileDiff" AiProvider: type: object diff --git a/lib/api-zod/src/generated/api.ts b/lib/api-zod/src/generated/api.ts index 6703cf4..aecfec3 100644 --- a/lib/api-zod/src/generated/api.ts +++ b/lib/api-zod/src/generated/api.ts @@ -60,6 +60,10 @@ export const GetDashboardResponse = zod.object({ "privacy": zod.number(), "total": zod.number() }), + "fingerprint": zod.string().describe('Deterministic hash over all files (path + per-file hash)'), + "relation": zod.union([zod.literal('new'),zod.literal('identical'),zod.literal('modified'),zod.literal(null)]).nullable().describe('Relation to previously stored skills'), + "similarity": zod.number().nullable().describe('Content-aware similarity (0-100) to the compared skill (identical files count fully, changed text files use line-level similarity)'), + "comparedScanId": zod.number().nullable().describe('The scan this one was compared against, if any'), "createdAt": zod.string() })), "topRules": zod.array(zod.object({ @@ -94,6 +98,10 @@ export const ListScansResponseItem = zod.object({ "privacy": zod.number(), "total": zod.number() }), + "fingerprint": zod.string().describe('Deterministic hash over all files (path + per-file hash)'), + "relation": zod.union([zod.literal('new'),zod.literal('identical'),zod.literal('modified'),zod.literal(null)]).nullable().describe('Relation to previously stored skills'), + "similarity": zod.number().nullable().describe('Content-aware similarity (0-100) to the compared skill (identical files count fully, changed text files use line-level similarity)'), + "comparedScanId": zod.number().nullable().describe('The scan this one was compared against, if any'), "createdAt": zod.string() }) export const ListScansResponse = zod.array(ListScansResponseItem) @@ -113,6 +121,53 @@ export const CreateScanBody = zod.object({ }) +/** + * Returns a file-level diff between the current scan (id) and a previously stored scan (otherId), including line-by-line diffs for modified text files. + * @summary Compare two scans on the file level + */ +export const CompareScansParams = zod.object({ + "id": zod.coerce.number(), + "otherId": zod.coerce.number() +}) + +export const CompareScansResponse = zod.object({ + "current": zod.object({ + "id": zod.number(), + "name": zod.string(), + "verdict": zod.enum(['pass', 'review', 'block']), + "riskScore": zod.number(), + "fileCount": zod.number(), + "fingerprint": zod.string(), + "createdAt": zod.string() +}), + "previous": zod.object({ + "id": zod.number(), + "name": zod.string(), + "verdict": zod.enum(['pass', 'review', 'block']), + "riskScore": zod.number(), + "fileCount": zod.number(), + "fingerprint": zod.string(), + "createdAt": zod.string() +}), + "files": zod.array(zod.object({ + "path": zod.string(), + "status": zod.enum(['unchanged', 'modified', 'added', 'removed']), + "previousHash": zod.string().nullable(), + "currentHash": zod.string().nullable(), + "previousSize": zod.number().nullable(), + "currentSize": zod.number().nullable(), + "previousHasContent": zod.boolean().nullable(), + "currentHasContent": zod.boolean().nullable(), + "lineDiff": zod.union([zod.array(zod.object({ + "type": zod.enum(['context', 'add', 'remove']), + "text": zod.string(), + "previousLine": zod.number().nullable(), + "currentLine": zod.number().nullable() +})),zod.null()]) +})) +}) + + /** * @summary Get a scan report with findings */ @@ -140,13 +195,19 @@ export const GetScanResponse = zod.object({ "privacy": zod.number(), "total": zod.number() }), + "fingerprint": zod.string().describe('Deterministic hash over all files (path + per-file hash)'), + "relation": zod.union([zod.literal('new'),zod.literal('identical'),zod.literal('modified'),zod.literal(null)]).nullable().describe('Relation to previously stored skills'), + "similarity": zod.number().nullable().describe('Content-aware similarity (0-100) to the compared skill (identical files count fully, changed text files use line-level similarity)'), + "comparedScanId": zod.number().nullable().describe('The scan this one was compared against, if any'), "createdAt": zod.string() }).and(zod.object({ "files": zod.array(zod.object({ "path": zod.string(), "kind": zod.enum(['instruction', 'script', 'resource']), "language": zod.string().nullish(), - "size": zod.number() + "size": zod.number(), + "hash": zod.string().describe('SHA-256 hash of the file content'), + "hasContent": zod.boolean().describe('Whether the text content was stored (false for binary files)') })), "findings": zod.array(zod.object({ "id": zod.number(), @@ -171,7 +232,15 @@ export const GetScanResponse = zod.object({ "findingCount": zod.number(), "scoreDelta": zod.number(), "detectedBy": zod.union([zod.literal('static'),zod.literal('ai'),zod.literal(null)]).nullish() -}).describe('A single inspection step (Prüfschritt) with its partial assessment (Teilbewertung).')) +}).describe('A single inspection step (Prüfschritt) with its partial assessment (Teilbewertung).')), + "checkCount": zod.number().describe('How often a skill with this exact fingerprint was scanned'), + "comparedScan": zod.union([zod.object({ + "id": zod.number(), + "name": zod.string(), + "verdict": zod.enum(['pass', 'review', 'block']), + "riskScore": zod.number(), + "createdAt": zod.string() +}),zod.null()]) })) diff --git a/lib/api-zod/src/generated/types/comparedScan.ts b/lib/api-zod/src/generated/types/comparedScan.ts new file mode 100644 index 0000000..9adf6a9 --- /dev/null +++ b/lib/api-zod/src/generated/types/comparedScan.ts @@ -0,0 +1,16 @@ +/** + * Generated by orval v8.9.1 🍺 + * Do not edit manually. + * Api + * API specification + * OpenAPI spec version: 0.1.0 + */ +import type { ComparedScanVerdict } from './comparedScanVerdict'; + +export interface ComparedScan { + id: number; + name: string; + verdict: ComparedScanVerdict; + riskScore: number; + createdAt: string; +} diff --git a/lib/api-zod/src/generated/types/comparedScanVerdict.ts b/lib/api-zod/src/generated/types/comparedScanVerdict.ts new file mode 100644 index 0000000..c04920b --- /dev/null +++ b/lib/api-zod/src/generated/types/comparedScanVerdict.ts @@ -0,0 +1,16 @@ +/** + * Generated by orval v8.9.1 🍺 + * Do not edit manually. + * Api + * API specification + * OpenAPI spec version: 0.1.0 + */ + +export type ComparedScanVerdict = typeof ComparedScanVerdict[keyof typeof ComparedScanVerdict]; + + +export const ComparedScanVerdict = { + pass: 'pass', + review: 'review', + block: 'block', +} as const; diff --git a/lib/api-zod/src/generated/types/diffLine.ts b/lib/api-zod/src/generated/types/diffLine.ts new file mode 100644 index 0000000..ee752a0 --- /dev/null +++ b/lib/api-zod/src/generated/types/diffLine.ts @@ -0,0 +1,17 @@ +/** + * Generated by orval v8.9.1 🍺 + * Do not edit manually. + * Api + * API specification + * OpenAPI spec version: 0.1.0 + */ +import type { DiffLineType } from './diffLineType'; + +export interface DiffLine { + type: DiffLineType; + text: string; + /** @nullable */ + previousLine: number | null; + /** @nullable */ + currentLine: number | null; +} diff --git a/lib/api-zod/src/generated/types/diffLineType.ts b/lib/api-zod/src/generated/types/diffLineType.ts new file mode 100644 index 0000000..573427b --- /dev/null +++ b/lib/api-zod/src/generated/types/diffLineType.ts @@ -0,0 +1,16 @@ +/** + * Generated by orval v8.9.1 🍺 + * Do not edit manually. + * Api + * API specification + * OpenAPI spec version: 0.1.0 + */ + +export type DiffLineType = typeof DiffLineType[keyof typeof DiffLineType]; + + +export const DiffLineType = { + context: 'context', + add: 'add', + remove: 'remove', +} as const; diff --git a/lib/api-zod/src/generated/types/index.ts b/lib/api-zod/src/generated/types/index.ts index 77e8127..297d4de 100644 --- a/lib/api-zod/src/generated/types/index.ts +++ b/lib/api-zod/src/generated/types/index.ts @@ -14,7 +14,11 @@ export * from './aiProviderUpdate'; export * from './aiProviderUpdateApiType'; export * from './apiError'; export * from './axisTotals'; +export * from './comparedScan'; +export * from './comparedScanVerdict'; export * from './dashboardSummary'; +export * from './diffLine'; +export * from './diffLineType'; export * from './finding'; export * from './findingAxis'; export * from './findingCounts'; @@ -40,9 +44,15 @@ export * from './scanCheckpointAxis'; export * from './scanCheckpointDetectedBy'; export * from './scanCheckpointSeverity'; export * from './scanCheckpointStatus'; +export * from './scanComparison'; +export * from './scanComparisonSide'; +export * from './scanComparisonSideVerdict'; export * from './scanDetail'; export * from './scanFile'; +export * from './scanFileDiff'; +export * from './scanFileDiffStatus'; export * from './scanFileKind'; +export * from './scanRelation'; export * from './scanSource'; export * from './scanStatus'; export * from './scanVerdict'; diff --git a/lib/api-zod/src/generated/types/scan.ts b/lib/api-zod/src/generated/types/scan.ts index 536b3d7..78004d2 100644 --- a/lib/api-zod/src/generated/types/scan.ts +++ b/lib/api-zod/src/generated/types/scan.ts @@ -6,6 +6,7 @@ * OpenAPI spec version: 0.1.0 */ import type { FindingCounts } from './findingCounts'; +import type { ScanRelation } from './scanRelation'; import type { ScanSource } from './scanSource'; import type { ScanStatus } from './scanStatus'; import type { ScanVerdict } from './scanVerdict'; @@ -22,5 +23,22 @@ export interface Scan { /** @nullable */ aiError?: string | null; findingCounts: FindingCounts; + /** Deterministic hash over all files (path + per-file hash) */ + fingerprint: string; + /** + * Relation to previously stored skills + * @nullable + */ + relation: ScanRelation; + /** + * Content-aware similarity (0-100) to the compared skill (identical files count fully, changed text files use line-level similarity) + * @nullable + */ + similarity: number | null; + /** + * The scan this one was compared against, if any + * @nullable + */ + comparedScanId: number | null; createdAt: string; } diff --git a/lib/api-zod/src/generated/types/scanComparison.ts b/lib/api-zod/src/generated/types/scanComparison.ts new file mode 100644 index 0000000..2ab93d3 --- /dev/null +++ b/lib/api-zod/src/generated/types/scanComparison.ts @@ -0,0 +1,15 @@ +/** + * Generated by orval v8.9.1 🍺 + * Do not edit manually. + * Api + * API specification + * OpenAPI spec version: 0.1.0 + */ +import type { ScanComparisonSide } from './scanComparisonSide'; +import type { ScanFileDiff } from './scanFileDiff'; + +export interface ScanComparison { + current: ScanComparisonSide; + previous: ScanComparisonSide; + files: ScanFileDiff[]; +} diff --git a/lib/api-zod/src/generated/types/scanComparisonSide.ts b/lib/api-zod/src/generated/types/scanComparisonSide.ts new file mode 100644 index 0000000..4c92a0e --- /dev/null +++ b/lib/api-zod/src/generated/types/scanComparisonSide.ts @@ -0,0 +1,18 @@ +/** + * Generated by orval v8.9.1 🍺 + * Do not edit manually. + * Api + * API specification + * OpenAPI spec version: 0.1.0 + */ +import type { ScanComparisonSideVerdict } from './scanComparisonSideVerdict'; + +export interface ScanComparisonSide { + id: number; + name: string; + verdict: ScanComparisonSideVerdict; + riskScore: number; + fileCount: number; + fingerprint: string; + createdAt: string; +} diff --git a/lib/api-zod/src/generated/types/scanComparisonSideVerdict.ts b/lib/api-zod/src/generated/types/scanComparisonSideVerdict.ts new file mode 100644 index 0000000..99c972f --- /dev/null +++ b/lib/api-zod/src/generated/types/scanComparisonSideVerdict.ts @@ -0,0 +1,16 @@ +/** + * Generated by orval v8.9.1 🍺 + * Do not edit manually. + * Api + * API specification + * OpenAPI spec version: 0.1.0 + */ + +export type ScanComparisonSideVerdict = typeof ScanComparisonSideVerdict[keyof typeof ScanComparisonSideVerdict]; + + +export const ScanComparisonSideVerdict = { + pass: 'pass', + review: 'review', + block: 'block', +} as const; diff --git a/lib/api-zod/src/generated/types/scanDetail.ts b/lib/api-zod/src/generated/types/scanDetail.ts index abd7914..a6ac57d 100644 --- a/lib/api-zod/src/generated/types/scanDetail.ts +++ b/lib/api-zod/src/generated/types/scanDetail.ts @@ -5,13 +5,17 @@ * API specification * OpenAPI spec version: 0.1.0 */ +import type { ComparedScan } from './comparedScan'; import type { Finding } from './finding'; import type { Scan } from './scan'; import type { ScanCheckpoint } from './scanCheckpoint'; import type { ScanFile } from './scanFile'; -export type ScanDetail = Scan & { +export type ScanDetail = Scan & ({ files: ScanFile[]; findings: Finding[]; checkpoints: ScanCheckpoint[]; -}; + /** How often a skill with this exact fingerprint was scanned */ + checkCount: number; + comparedScan: ComparedScan | null; +}); diff --git a/lib/api-zod/src/generated/types/scanFile.ts b/lib/api-zod/src/generated/types/scanFile.ts index 1a61b47..feeb72a 100644 --- a/lib/api-zod/src/generated/types/scanFile.ts +++ b/lib/api-zod/src/generated/types/scanFile.ts @@ -13,4 +13,8 @@ export interface ScanFile { /** @nullable */ language?: string | null; size: number; + /** SHA-256 hash of the file content */ + hash: string; + /** Whether the text content was stored (false for binary files) */ + hasContent: boolean; } diff --git a/lib/api-zod/src/generated/types/scanFileDiff.ts b/lib/api-zod/src/generated/types/scanFileDiff.ts new file mode 100644 index 0000000..988e827 --- /dev/null +++ b/lib/api-zod/src/generated/types/scanFileDiff.ts @@ -0,0 +1,27 @@ +/** + * Generated by orval v8.9.1 🍺 + * Do not edit manually. + * Api + * API specification + * OpenAPI spec version: 0.1.0 + */ +import type { DiffLine } from './diffLine'; +import type { ScanFileDiffStatus } from './scanFileDiffStatus'; + +export interface ScanFileDiff { + path: string; + status: ScanFileDiffStatus; + /** @nullable */ + previousHash: string | null; + /** @nullable */ + currentHash: string | null; + /** @nullable */ + previousSize: number | null; + /** @nullable */ + currentSize: number | null; + /** @nullable */ + previousHasContent: boolean | null; + /** @nullable */ + currentHasContent: boolean | null; + lineDiff: DiffLine[] | null; +} diff --git a/lib/api-zod/src/generated/types/scanFileDiffStatus.ts b/lib/api-zod/src/generated/types/scanFileDiffStatus.ts new file mode 100644 index 0000000..8e5f6fb --- /dev/null +++ b/lib/api-zod/src/generated/types/scanFileDiffStatus.ts @@ -0,0 +1,17 @@ +/** + * Generated by orval v8.9.1 🍺 + * Do not edit manually. + * Api + * API specification + * OpenAPI spec version: 0.1.0 + */ + +export type ScanFileDiffStatus = typeof ScanFileDiffStatus[keyof typeof ScanFileDiffStatus]; + + +export const ScanFileDiffStatus = { + unchanged: 'unchanged', + modified: 'modified', + added: 'added', + removed: 'removed', +} as const; diff --git a/lib/api-zod/src/generated/types/scanRelation.ts b/lib/api-zod/src/generated/types/scanRelation.ts new file mode 100644 index 0000000..6e35cb3 --- /dev/null +++ b/lib/api-zod/src/generated/types/scanRelation.ts @@ -0,0 +1,20 @@ +/** + * Generated by orval v8.9.1 🍺 + * Do not edit manually. + * Api + * API specification + * OpenAPI spec version: 0.1.0 + */ + +/** + * Relation to previously stored skills + * @nullable + */ +export type ScanRelation = typeof ScanRelation[keyof typeof ScanRelation] | null; + + +export const ScanRelation = { + new: 'new', + identical: 'identical', + modified: 'modified', +} as const; diff --git a/lib/db/src/schema/scanFiles.ts b/lib/db/src/schema/scanFiles.ts index c696e6c..c327e77 100644 --- a/lib/db/src/schema/scanFiles.ts +++ b/lib/db/src/schema/scanFiles.ts @@ -10,6 +10,8 @@ export const scanFilesTable = pgTable("scan_files", { kind: text("kind").notNull(), language: text("language"), size: integer("size").notNull().default(0), + hash: text("hash").notNull().default(""), + content: text("content"), }); export type ScanFile = typeof scanFilesTable.$inferSelect; diff --git a/lib/db/src/schema/scans.ts b/lib/db/src/schema/scans.ts index c5b06e6..ed58be9 100644 --- a/lib/db/src/schema/scans.ts +++ b/lib/db/src/schema/scans.ts @@ -6,6 +6,7 @@ import { boolean, timestamp, jsonb, + index, } from "drizzle-orm/pg-core"; import { sql } from "drizzle-orm"; @@ -34,25 +35,37 @@ export type ScanCheckpoint = { detectedBy: "static" | "ai" | null; }; -export const scansTable = pgTable("scans", { - id: serial("id").primaryKey(), - name: text("name").notNull(), - source: text("source").notNull(), - status: text("status").notNull().default("completed"), - verdict: text("verdict").notNull().default("pass"), - riskScore: integer("risk_score").notNull().default(0), - fileCount: integer("file_count").notNull().default(0), - aiUsed: boolean("ai_used").notNull().default(false), - aiError: text("ai_error"), - findingCounts: jsonb("finding_counts").$type().notNull(), - checkpoints: jsonb("checkpoints") - .$type() - .notNull() - .default(sql`'[]'::jsonb`), - createdAt: timestamp("created_at", { withTimezone: true }) - .notNull() - .defaultNow(), -}); +export type ScanRelation = "new" | "identical" | "modified"; + +export const scansTable = pgTable( + "scans", + { + id: serial("id").primaryKey(), + name: text("name").notNull(), + source: text("source").notNull(), + status: text("status").notNull().default("completed"), + verdict: text("verdict").notNull().default("pass"), + riskScore: integer("risk_score").notNull().default(0), + fileCount: integer("file_count").notNull().default(0), + aiUsed: boolean("ai_used").notNull().default(false), + aiError: text("ai_error"), + findingCounts: jsonb("finding_counts").$type().notNull(), + checkpoints: jsonb("checkpoints") + .$type() + .notNull() + .default(sql`'[]'::jsonb`), + fingerprint: text("fingerprint").notNull().default(""), + relation: text("relation").$type(), + similarity: integer("similarity"), + comparedScanId: integer("compared_scan_id"), + createdAt: timestamp("created_at", { withTimezone: true }) + .notNull() + .defaultNow(), + }, + (t) => ({ + fingerprintIdx: index("scans_fingerprint_idx").on(t.fingerprint), + }), +); export type Scan = typeof scansTable.$inferSelect; export type InsertScan = typeof scansTable.$inferInsert;