skillguard/artifacts/api-server/src/lib/scanEngine.ts

249 lines
6.5 KiB
TypeScript
Raw Normal View History

import { db } from "@workspace/db";
import {
rulesTable,
promptsTable,
aiProvidersTable,
type Prompt,
} from "@workspace/db";
import { eq } from "drizzle-orm";
import {
STATIC_RULES,
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
AI_RULES,
runStaticRule,
type ParsedFile,
type RawFinding,
type Severity,
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
type Axis,
} from "./ruleCatalog";
import type {
FindingCounts as DbFindingCounts,
ScanCheckpoint,
} from "@workspace/db";
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
import { runAiAnalysis, type AiRuleConfig } from "./aiAnalysis";
export type { ScanCheckpoint } from "@workspace/db";
const SEVERITY_WEIGHT: Record<Severity, number> = {
critical: 50,
high: 18,
medium: 7,
low: 2,
info: 0,
};
export type ScanProgressEvent =
| { type: "ai-start" }
| { type: "checkpoint"; checkpoint: ScanCheckpoint };
export type ProgressFn = (
event: ScanProgressEvent,
) => void | Promise<void>;
export type EngineResult = {
findings: RawFinding[];
counts: DbFindingCounts;
checkpoints: ScanCheckpoint[];
riskScore: number;
verdict: "pass" | "review" | "block";
aiUsed: boolean;
aiError: string | null;
};
export function computeCounts(findings: RawFinding[]): DbFindingCounts {
const counts: DbFindingCounts = {
critical: 0,
high: 0,
medium: 0,
low: 0,
info: 0,
security: 0,
privacy: 0,
total: findings.length,
};
for (const f of findings) {
counts[f.severity] += 1;
counts[f.axis] += 1;
}
return counts;
}
export function computeScore(findings: RawFinding[]): number {
let score = 0;
for (const f of findings) score += SEVERITY_WEIGHT[f.severity];
return Math.min(100, score);
}
export function computeVerdict(
findings: RawFinding[],
score: number,
): "pass" | "review" | "block" {
const hasCritical = findings.some((f) => f.severity === "critical");
const hasHigh = findings.some((f) => f.severity === "high");
if (hasCritical || score >= 70) return "block";
if (hasHigh || score >= 20) return "review";
return "pass";
}
function scoreOf(findings: RawFinding[]): number {
return findings.reduce((s, f) => s + SEVERITY_WEIGHT[f.severity], 0);
}
export async function analyzeSkill(
files: ParsedFile[],
useAi: boolean,
onProgress?: ProgressFn,
): Promise<EngineResult> {
const dbRules = await db.select().from(rulesTable);
const ruleConfig = new Map(
dbRules.map((r) => [
r.ruleId,
{ enabled: r.enabled, severity: r.severity as Severity },
]),
);
const findings: RawFinding[] = [];
const checkpoints: ScanCheckpoint[] = [];
for (const rule of STATIC_RULES) {
const cfg = ruleConfig.get(rule.ruleId);
const severity = cfg?.severity ?? rule.defaultSeverity;
if (cfg && !cfg.enabled) {
const checkpoint: ScanCheckpoint = {
id: rule.ruleId,
label: rule.title,
category: rule.category,
axis: rule.axis,
severity,
status: "skipped",
findingCount: 0,
scoreDelta: 0,
detectedBy: "static",
};
checkpoints.push(checkpoint);
await onProgress?.({ type: "checkpoint", checkpoint });
continue;
}
const ruleFindings: RawFinding[] = [];
for (const file of files) {
ruleFindings.push(...runStaticRule(rule, file, severity));
}
findings.push(...ruleFindings);
const checkpoint: ScanCheckpoint = {
id: rule.ruleId,
label: rule.title,
category: rule.category,
axis: rule.axis,
severity,
status: ruleFindings.length > 0 ? "flagged" : "pass",
findingCount: ruleFindings.length,
scoreDelta: scoreOf(ruleFindings),
detectedBy: "static",
};
checkpoints.push(checkpoint);
await onProgress?.({ type: "checkpoint", checkpoint });
}
let aiUsed = false;
let aiError: string | null = null;
let aiFindings: RawFinding[] = [];
if (useAi) {
await onProgress?.({ type: "ai-start" });
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
const aiRuleIds = new Set(AI_RULES.map((r) => r.ruleId));
const enabledAiRules: AiRuleConfig[] = AI_RULES.filter((rule) => {
const cfg = ruleConfig.get(rule.ruleId);
return cfg ? cfg.enabled : true;
}).map((rule) => ({
ruleId: rule.ruleId,
title: rule.title,
description: rule.description,
axis: rule.axis as Axis,
severity: ruleConfig.get(rule.ruleId)?.severity ?? rule.defaultSeverity,
}));
const aiRulesEnabled = dbRules
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
.filter((r) => r.detectionType === "ai" || aiRuleIds.has(r.ruleId))
.some((r) => r.enabled);
const [provider] = await db
.select()
.from(aiProvidersTable)
.where(eq(aiProvidersTable.enabled, true))
.limit(1);
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
if (!aiRulesEnabled || enabledAiRules.length === 0) {
aiError = "KI-Regeln sind im Regelwerk deaktiviert.";
} else if (!provider) {
aiError =
"Kein aktiver KI-Provider konfiguriert. Bitte im Admin-Bereich einrichten.";
} else if (!provider.apiToken) {
aiError = `Für den Provider "${provider.name}" ist kein API-Token hinterlegt.`;
} else {
const prompts: Prompt[] = await db.select().from(promptsTable);
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
const result = await runAiAnalysis(
provider,
prompts,
files,
enabledAiRules,
);
aiError = result.error;
if (!result.error) {
aiUsed = true;
aiFindings = result.findings;
findings.push(...result.findings);
}
}
for (const rule of AI_RULES) {
const cfg = ruleConfig.get(rule.ruleId);
const severity = cfg?.severity ?? rule.defaultSeverity;
const enabled = cfg ? cfg.enabled : true;
let status: ScanCheckpoint["status"];
let findingCount = 0;
let scoreDelta = 0;
if (!enabled) {
status = "skipped";
} else if (!aiUsed) {
status = "error";
} else {
const ruleFindings = aiFindings.filter((f) => f.ruleId === rule.ruleId);
findingCount = ruleFindings.length;
scoreDelta = scoreOf(ruleFindings);
status = findingCount > 0 ? "flagged" : "pass";
}
const checkpoint: ScanCheckpoint = {
id: rule.ruleId,
label: rule.title,
category: rule.category,
axis: rule.axis,
severity,
status,
findingCount,
scoreDelta,
detectedBy: "ai",
};
checkpoints.push(checkpoint);
await onProgress?.({ type: "checkpoint", checkpoint });
}
}
const riskScore = computeScore(findings);
const counts = computeCounts(findings);
const verdict = computeVerdict(findings, riskScore);
return {
findings,
counts,
checkpoints,
riskScore,
verdict,
aiUsed,
aiError,
};
}