skillguard/artifacts/api-server/src/lib/aiAnalysis.ts

220 lines
6.6 KiB
TypeScript
Raw Normal View History

import type { AiProvider, Prompt } from "@workspace/db";
import type { ParsedFile, RawFinding, Severity, Axis } from "./ruleCatalog";
const SEVERITIES: Severity[] = ["critical", "high", "medium", "low", "info"];
const AXES: Axis[] = ["security", "privacy"];
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
export type AiRuleConfig = {
ruleId: string;
title: string;
description: string;
axis: Axis;
severity: Severity;
};
export type AiResult = {
findings: RawFinding[];
error: string | null;
};
const FETCH_TIMEOUT_MS = 60000;
function redactSecrets(text: string, token: string | null | undefined): string {
let out = text;
if (token && token.length >= 4) {
out = out.split(token).join("[REDACTED]");
}
out = out.replace(/(Bearer\s+)[A-Za-z0-9._\-]+/gi, "$1[REDACTED]");
out = out.replace(/\bsk-[A-Za-z0-9._\-]{8,}\b/g, "[REDACTED]");
return out;
}
async function fetchWithTimeout(
url: string,
init: RequestInit,
): Promise<Response> {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
try {
return await fetch(url, { ...init, signal: controller.signal });
} finally {
clearTimeout(timer);
}
}
function buildSkillPayload(files: ParsedFile[]): string {
const parts: string[] = [];
let budget = 60000;
for (const f of files) {
if (f.content === "") continue;
const header = `\n===== DATEI: ${f.path} (${f.kind}) =====\n`;
const body = f.content.slice(0, 16000);
if (header.length + body.length > budget) {
parts.push(header + body.slice(0, Math.max(0, budget - header.length)));
break;
}
parts.push(header + body);
budget -= header.length + body.length;
}
return parts.join("\n");
}
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
function coerceFinding(
raw: unknown,
allowed: Map<string, AiRuleConfig>,
): RawFinding | null {
if (!raw || typeof raw !== "object") return null;
const o = raw as Record<string, unknown>;
const title = typeof o.title === "string" ? o.title.slice(0, 200) : null;
if (!title) return null;
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
const rule = typeof o.ruleId === "string" ? allowed.get(o.ruleId) : undefined;
if (!rule) {
return null;
}
return {
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
ruleId: rule.ruleId,
axis: rule.axis,
severity: rule.severity,
title,
description:
typeof o.description === "string" ? o.description.slice(0, 2000) : title,
remediation:
typeof o.remediation === "string" ? o.remediation.slice(0, 2000) : null,
file: typeof o.file === "string" ? o.file.slice(0, 400) : null,
line: typeof o.line === "number" ? o.line : null,
snippet: typeof o.snippet === "string" ? o.snippet.slice(0, 400) : null,
detectedBy: "ai",
};
}
function extractJson(text: string): unknown {
const fence = text.match(/```(?:json)?\s*([\s\S]*?)```/i);
const candidate = fence ? fence[1] : text;
const start = candidate.indexOf("{");
const end = candidate.lastIndexOf("}");
if (start === -1 || end === -1 || end <= start) {
throw new Error("Keine JSON-Antwort von der KI erhalten.");
}
return JSON.parse(candidate.slice(start, end + 1));
}
async function callOpenAiCompatible(
provider: AiProvider,
system: string,
user: string,
): Promise<string> {
const base = provider.baseUrl.replace(/\/$/, "");
const url = `${base}/chat/completions`;
const res = await fetchWithTimeout(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${provider.apiToken ?? ""}`,
},
body: JSON.stringify({
model: provider.model,
messages: [
{ role: "system", content: system },
{ role: "user", content: user },
],
}),
});
if (!res.ok) {
const body = await res.text();
throw new Error(
`HTTP ${res.status}: ${redactSecrets(body.slice(0, 300), provider.apiToken)}`,
);
}
const data = (await res.json()) as {
choices?: { message?: { content?: string } }[];
};
return data.choices?.[0]?.message?.content ?? "";
}
async function callAnthropic(
provider: AiProvider,
system: string,
user: string,
): Promise<string> {
const base = provider.baseUrl.replace(/\/$/, "");
const url = `${base}/messages`;
const res = await fetchWithTimeout(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
"x-api-key": provider.apiToken ?? "",
"anthropic-version": "2023-06-01",
},
body: JSON.stringify({
model: provider.model,
max_tokens: 4096,
system,
messages: [{ role: "user", content: user }],
}),
});
if (!res.ok) {
const body = await res.text();
throw new Error(
`HTTP ${res.status}: ${redactSecrets(body.slice(0, 300), provider.apiToken)}`,
);
}
const data = (await res.json()) as { content?: { text?: string }[] };
return data.content?.[0]?.text ?? "";
}
export async function callProvider(
provider: AiProvider,
system: string,
user: string,
): Promise<string> {
if (provider.apiType === "anthropic") {
return callAnthropic(provider, system, user);
}
return callOpenAiCompatible(provider, system, user);
}
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
function buildRuleMenu(aiRules: AiRuleConfig[]): string {
const lines = aiRules.map(
(r) => `- ${r.ruleId} (${r.axis}): ${r.title}${r.description}`,
);
return [
"",
"Ordne jeden Befund GENAU EINER der folgenden aktiven Kategorien zu und gib deren Kennung im Pflichtfeld \"ruleId\" zurück. Verwende ausschließlich diese Kennungen:",
...lines,
'Befunde, die zu keiner dieser Kategorien passen, lasse weg. Das Feld "severity" wird serverseitig festgelegt und kann von dir ignoriert werden.',
].join("\n");
}
export async function runAiAnalysis(
provider: AiProvider,
prompts: Prompt[],
files: ParsedFile[],
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
aiRules: AiRuleConfig[],
): Promise<AiResult> {
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
if (aiRules.length === 0) {
return { findings: [], error: null };
}
const allowed = new Map(aiRules.map((r) => [r.ruleId, r]));
const systemPrompt = prompts.find((p) => p.key === "system")?.content ?? "";
const analysisPrompt =
prompts.find((p) => p.key === "analysis")?.content ?? "";
const payload = buildSkillPayload(files);
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
const user = `${analysisPrompt}\n${buildRuleMenu(aiRules)}\n\nHier ist das zu prüfende Skill:\n${payload}`;
try {
const content = await callProvider(provider, systemPrompt, user);
const parsed = extractJson(content) as { findings?: unknown[] };
const findingsRaw = Array.isArray(parsed.findings) ? parsed.findings : [];
const findings = findingsRaw
Task #2: Skill mit konfigurierter KI tatsächlich semantisch analysieren Verified the AI analysis end-to-end with a real provider and fixed two gaps found during the live run. Findings & fixes: - gpt-5 series (Replit AI Integrations modelfarm default) rejected the hardcoded `temperature: 0.1` with HTTP 400, silently disabling AI analysis. Removed the temperature param from the OpenAI-compatible request for broad model compatibility (aiAnalysis.ts). - Per-rule AI config (enable/disable/severity) was only a global on/off gate and AI findings weren't mapped to the AI rule IDs, so individual rule severity was ignored. runAiAnalysis now receives the enabled AI rules, instructs the model to classify each finding into one of those ruleIds, drops findings for disabled rules, and overrides severity/axis with the configured values (aiAnalysis.ts + scanEngine.ts). End-to-end verification (Replit OpenAI integration, gpt-5-mini provider): - "KI-Analyse aktivieren" produces AI findings mapped to AI-PROMPT-INJECTION, AI-MALICIOUS-INTENT, AI-DATA-PRIVACY. - Disabling AI-MALICIOUS-INTENT removed its finding; setting AI-PROMPT-INJECTION to critical was reflected in the result. - Wrong baseUrl and invalid token (real OpenAI endpoint) produce understandable aiError messages with no token leak. Side effects / notes: - Set up the Replit OpenAI AI Integration (env vars) and created one enabled provider row ("Replit OpenAI") so AI analysis works out of the box. Each AI-enabled scan bills the user's Replit credits. - Test scans created during verification were deleted. - artifacts/api-server typecheck passes. Replit-Task-Id: 7321caa4-5079-4db7-8ed2-4ccaa74fa577
2026-06-10 13:56:15 +00:00
.map((f) => coerceFinding(f, allowed))
.filter((f): f is RawFinding => f !== null);
return { findings, error: null };
} catch (err) {
return {
findings: [],
error: err instanceof Error ? err.message : "Unbekannter KI-Fehler",
};
}
}