161 lines
4 KiB
TypeScript
161 lines
4 KiB
TypeScript
|
|
import { unzipSync, strFromU8 } from "fflate";
|
||
|
|
import type { FileKind, ParsedFile } from "./ruleCatalog";
|
||
|
|
|
||
|
|
const LANG_BY_EXT: Record<string, string> = {
|
||
|
|
sh: "shell",
|
||
|
|
bash: "shell",
|
||
|
|
zsh: "shell",
|
||
|
|
py: "python",
|
||
|
|
js: "javascript",
|
||
|
|
mjs: "javascript",
|
||
|
|
cjs: "javascript",
|
||
|
|
ts: "typescript",
|
||
|
|
rb: "ruby",
|
||
|
|
pl: "perl",
|
||
|
|
php: "php",
|
||
|
|
ps1: "powershell",
|
||
|
|
go: "go",
|
||
|
|
rs: "rust",
|
||
|
|
md: "markdown",
|
||
|
|
txt: "text",
|
||
|
|
json: "json",
|
||
|
|
yaml: "yaml",
|
||
|
|
yml: "yaml",
|
||
|
|
toml: "toml",
|
||
|
|
env: "dotenv",
|
||
|
|
};
|
||
|
|
|
||
|
|
const SCRIPT_EXTS = new Set([
|
||
|
|
"sh",
|
||
|
|
"bash",
|
||
|
|
"zsh",
|
||
|
|
"py",
|
||
|
|
"js",
|
||
|
|
"mjs",
|
||
|
|
"cjs",
|
||
|
|
"ts",
|
||
|
|
"rb",
|
||
|
|
"pl",
|
||
|
|
"php",
|
||
|
|
"ps1",
|
||
|
|
"go",
|
||
|
|
"rs",
|
||
|
|
]);
|
||
|
|
|
||
|
|
const SKIP_DIRS = ["__macosx/", ".git/", "node_modules/"];
|
||
|
|
|
||
|
|
const MAX_ZIP_FILES = 2000;
|
||
|
|
const MAX_ZIP_TOTAL_BYTES = 60 * 1024 * 1024;
|
||
|
|
const MAX_ZIP_FILE_BYTES = 5 * 1024 * 1024;
|
||
|
|
|
||
|
|
function extOf(path: string): string {
|
||
|
|
const base = path.split("/").pop() ?? path;
|
||
|
|
const dot = base.lastIndexOf(".");
|
||
|
|
return dot >= 0 ? base.slice(dot + 1).toLowerCase() : "";
|
||
|
|
}
|
||
|
|
|
||
|
|
function classify(path: string): FileKind {
|
||
|
|
const base = (path.split("/").pop() ?? path).toLowerCase();
|
||
|
|
const ext = extOf(path);
|
||
|
|
if (base === "skill.md") return "instruction";
|
||
|
|
if (SCRIPT_EXTS.has(ext)) return "script";
|
||
|
|
if (ext === "md" || ext === "txt") return "instruction";
|
||
|
|
return "resource";
|
||
|
|
}
|
||
|
|
|
||
|
|
function isProbablyBinary(bytes: Uint8Array): boolean {
|
||
|
|
const len = Math.min(bytes.length, 4000);
|
||
|
|
let nontext = 0;
|
||
|
|
for (let i = 0; i < len; i++) {
|
||
|
|
const b = bytes[i];
|
||
|
|
if (b === 0) return true;
|
||
|
|
if (b < 9 || (b > 13 && b < 32)) nontext++;
|
||
|
|
}
|
||
|
|
return len > 0 && nontext / len > 0.3;
|
||
|
|
}
|
||
|
|
|
||
|
|
export function parseZip(buffer: Buffer): ParsedFile[] {
|
||
|
|
const files = unzipSync(new Uint8Array(buffer));
|
||
|
|
const result: ParsedFile[] = [];
|
||
|
|
let totalBytes = 0;
|
||
|
|
let processed = 0;
|
||
|
|
for (const [rawPath, bytes] of Object.entries(files)) {
|
||
|
|
const path = rawPath.replace(/\\/g, "/");
|
||
|
|
if (path.endsWith("/")) continue;
|
||
|
|
const lower = path.toLowerCase();
|
||
|
|
if (SKIP_DIRS.some((d) => lower.includes(d))) continue;
|
||
|
|
if (bytes.length === 0) continue;
|
||
|
|
if (bytes.length > MAX_ZIP_FILE_BYTES) continue;
|
||
|
|
totalBytes += bytes.length;
|
||
|
|
if (totalBytes > MAX_ZIP_TOTAL_BYTES) {
|
||
|
|
throw new Error("ZIP-Archiv ist zu groß (entpackt).");
|
||
|
|
}
|
||
|
|
processed += 1;
|
||
|
|
if (processed > MAX_ZIP_FILES) {
|
||
|
|
throw new Error("ZIP-Archiv enthält zu viele Dateien.");
|
||
|
|
}
|
||
|
|
if (isProbablyBinary(bytes)) {
|
||
|
|
result.push({
|
||
|
|
path,
|
||
|
|
kind: "resource",
|
||
|
|
language: null,
|
||
|
|
content: "",
|
||
|
|
size: bytes.length,
|
||
|
|
});
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
result.push({
|
||
|
|
path,
|
||
|
|
kind: classify(path),
|
||
|
|
language: LANG_BY_EXT[extOf(path)] ?? null,
|
||
|
|
content: strFromU8(bytes),
|
||
|
|
size: bytes.length,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
|
||
|
|
export function parseSingleFile(filename: string, buffer: Buffer): ParsedFile {
|
||
|
|
const path = filename.replace(/\\/g, "/").split("/").pop() ?? filename;
|
||
|
|
if (isProbablyBinary(new Uint8Array(buffer))) {
|
||
|
|
return {
|
||
|
|
path,
|
||
|
|
kind: "resource",
|
||
|
|
language: null,
|
||
|
|
content: "",
|
||
|
|
size: buffer.length,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
return {
|
||
|
|
path,
|
||
|
|
kind: classify(path),
|
||
|
|
language: LANG_BY_EXT[extOf(path)] ?? null,
|
||
|
|
content: buffer.toString("utf-8"),
|
||
|
|
size: buffer.length,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
export function parseText(text: string, name: string): ParsedFile {
|
||
|
|
return {
|
||
|
|
path: name || "SKILL.md",
|
||
|
|
kind: "instruction",
|
||
|
|
language: "markdown",
|
||
|
|
content: text,
|
||
|
|
size: Buffer.byteLength(text, "utf-8"),
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
export function deriveScanName(files: ParsedFile[], fallback: string): string {
|
||
|
|
const skillMd = files.find(
|
||
|
|
(f) => (f.path.split("/").pop() ?? "").toLowerCase() === "skill.md",
|
||
|
|
);
|
||
|
|
if (skillMd) {
|
||
|
|
const m = skillMd.content.match(/^#\s+(.+)$/m);
|
||
|
|
if (m) return m[1].trim().slice(0, 120);
|
||
|
|
const nameMatch = skillMd.content.match(/^name:\s*(.+)$/im);
|
||
|
|
if (nameMatch) return nameMatch[1].trim().replace(/^["']|["']$/g, "").slice(0, 120);
|
||
|
|
}
|
||
|
|
const top = files[0]?.path.split("/")[0];
|
||
|
|
return (top || fallback).slice(0, 120);
|
||
|
|
}
|