Files
BoostAI/FineTune/server.mjs
2026-05-26 13:43:09 +01:00

802 lines
24 KiB
JavaScript

import express from "express";
import { createServer } from "node:http";
import crypto from "node:crypto";
import path from "node:path";
import { fileURLToPath, URL } from "node:url";
import { WebSocketServer } from "ws";
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const app = express();
const host = process.env.HOST || "0.0.0.0";
const port = Number(process.env.PORT || 4310);
const server = createServer(app);
const websocketServer = new WebSocketServer({ noServer: true });
const sharedWorkspaces = new Map();
app.use(express.json({ limit: "2mb" }));
app.use(express.static(path.join(__dirname, "public")));
app.get("/health", (_req, res) => {
res.json({ ok: true });
});
app.get("/api/config", (_req, res) => {
res.json({
hasAiConfig: hasAiConfig(),
endpoint: process.env.FINE_TUNE_AI_ENDPOINT || "",
model: process.env.FINE_TUNE_AI_MODEL || "",
hasQuestionGeneratorConfig: hasQuestionGeneratorConfig(),
backendUrl: process.env.FINE_TUNE_BACKEND_URL || "",
});
});
app.post("/api/questions/generate", async (req, res) => {
try {
assertQuestionGeneratorConfig();
const input = sanitizeGeneratorInput(req.body);
assertGeneratorInput(input);
const result = await callQuestionGenerator(input);
res.json(result);
} catch (error) {
handleError(res, error);
}
});
app.post("/api/assignment/generate", async (req, res) => {
try {
assertQuestionGeneratorConfig();
const input = sanitizeGeneratorInput(req.body);
assertGeneratorInput(input);
const result = await callQuestionGenerator(input);
res.json({
seed: result.seed ?? null,
count: result.count ?? input.count,
questions: Array.isArray(result.data) ? result.data.map(mapGeneratedQuestion) : [],
});
} catch (error) {
handleError(res, error);
}
});
app.post("/api/assignment/student-draft", async (req, res) => {
try {
assertAiConfig();
const input = sanitizeAssignmentInput(req.body);
assertAssignmentForStudentDraft(input);
const result = await callAiJson({
schemaName: "fine_tune_assignment_student_draft",
schema: {
type: "object",
additionalProperties: false,
properties: {
questions: {
type: "array",
items: {
type: "object",
additionalProperties: false,
properties: {
questionId: { type: "integer" },
answerText: { type: "string" },
workingSteps: { type: "string" },
solveMode: { type: "string" },
},
required: ["questionId", "answerText", "workingSteps", "solveMode"],
},
},
},
required: ["questions"],
},
systemPrompt:
"You are helping create high-quality fine-tuning data for assignment review. Generate realistic student submissions for every question in the assignment. The work should sound like one student completed the whole assignment. Some answers may be correct, partially correct, or incorrect, but they should stay plausible and classroom-realistic. Return only the requested JSON.",
userPrompt: buildStudentDraftPrompt(input),
});
res.json({
questions: normalizeStudentDraftQuestions(input.questions, result.questions),
});
} catch (error) {
handleError(res, error);
}
});
app.post("/api/assignment/teacher-draft", async (req, res) => {
try {
assertAiConfig();
const input = sanitizeAssignmentInput(req.body);
assertAssignmentForTeacherDraft(input);
const result = await callAiJson({
schemaName: "fine_tune_assignment_teacher_draft",
schema: {
type: "object",
additionalProperties: false,
properties: {
questions: {
type: "array",
items: {
type: "object",
additionalProperties: false,
properties: {
questionId: { type: "integer" },
aiFeedback: { type: "string" },
understandingScore: { type: "number" },
confidence: { type: "number" },
needsAttention: { type: "boolean" },
issueReason: { type: "string" },
},
required: [
"questionId",
"aiFeedback",
"understandingScore",
"confidence",
"needsAttention",
"issueReason",
],
},
},
assignmentSummary: { type: "string" },
recommendedNextStep: { type: "string" },
},
required: ["questions", "assignmentSummary", "recommendedNextStep"],
},
systemPrompt:
"You are helping create fine-tuning labels for a teacher review system. Review the full assignment in one pass. Return one question-level review for every question using the exact backend-aligned fields questionId, aiFeedback, understandingScore, confidence, needsAttention, issueReason, plus assignmentSummary and recommendedNextStep for the whole assignment. Be strict but fair. Focus on conceptual understanding, not just final correctness. Return only the requested JSON.",
userPrompt: buildTeacherDraftPrompt(input),
});
res.json({
questions: normalizeTeacherDraftQuestions(input.questions, result.questions),
assignmentSummary: stringOrEmpty(result.assignmentSummary),
recommendedNextStep: stringOrEmpty(result.recommendedNextStep),
});
} catch (error) {
handleError(res, error);
}
});
app.get("*", (_req, res) => {
res.sendFile(path.join(__dirname, "public", "index.html"));
});
server.on("upgrade", (req, socket, head) => {
const url = new URL(req.url || "/", `http://${req.headers.host || "localhost"}`);
if (url.pathname !== "/ws") {
socket.destroy();
return;
}
websocketServer.handleUpgrade(req, socket, head, (ws) => {
websocketServer.emit("connection", ws, req, url);
});
});
websocketServer.on("connection", (ws, _req, url) => {
const workspaceId = sanitizeWorkspaceId(url.searchParams.get("workspace"));
const clientId = crypto.randomUUID();
const workspace = getSharedWorkspace(workspaceId);
workspace.clients.set(clientId, ws);
sendSocketMessage(ws, {
type: "workspace:init",
workspaceId,
clientId,
version: workspace.version,
state: cloneJson(workspace.state),
presenceCount: workspace.clients.size,
updatedAt: workspace.updatedAt,
});
broadcastWorkspacePresence(workspaceId);
ws.on("message", (raw) => {
let message;
try {
message = JSON.parse(String(raw));
} catch {
sendSocketMessage(ws, { type: "workspace:error", message: "Invalid collaboration payload." });
return;
}
if (message?.type !== "workspace:update") {
sendSocketMessage(ws, { type: "workspace:error", message: "Unsupported collaboration message." });
return;
}
if (!isPlainObject(message.state)) {
sendSocketMessage(ws, { type: "workspace:error", message: "Workspace state must be an object." });
return;
}
workspace.version += 1;
workspace.state = cloneJson(message.state);
workspace.updatedAt = new Date().toISOString();
broadcastWorkspaceSnapshot(workspaceId, clientId);
});
ws.on("close", () => {
workspace.clients.delete(clientId);
broadcastWorkspacePresence(workspaceId);
});
ws.on("error", () => {});
});
server.listen(port, host, () => {
console.log(`FineTune helper listening on http://${host}:${port}`);
});
function getSharedWorkspace(workspaceId) {
if (!sharedWorkspaces.has(workspaceId)) {
sharedWorkspaces.set(workspaceId, {
version: 0,
updatedAt: null,
state: null,
clients: new Map(),
});
}
return sharedWorkspaces.get(workspaceId);
}
function broadcastWorkspaceSnapshot(workspaceId, actorClientId) {
const workspace = getSharedWorkspace(workspaceId);
broadcastWorkspaceMessage(workspaceId, {
type: "workspace:snapshot",
workspaceId,
version: workspace.version,
state: cloneJson(workspace.state),
actorClientId,
presenceCount: workspace.clients.size,
updatedAt: workspace.updatedAt,
});
}
function broadcastWorkspacePresence(workspaceId) {
const workspace = getSharedWorkspace(workspaceId);
broadcastWorkspaceMessage(workspaceId, {
type: "workspace:presence",
workspaceId,
presenceCount: workspace.clients.size,
version: workspace.version,
});
}
function broadcastWorkspaceMessage(workspaceId, payload) {
const workspace = getSharedWorkspace(workspaceId);
for (const ws of workspace.clients.values()) {
sendSocketMessage(ws, payload);
}
}
function sendSocketMessage(ws, payload) {
if (ws.readyState !== 1) return;
ws.send(JSON.stringify(payload));
}
function sanitizeWorkspaceId(value) {
const raw = String(value || "shared").trim();
if (!raw) return "shared";
return /^[a-zA-Z0-9._-]{1,64}$/.test(raw) ? raw : "shared";
}
function isPlainObject(value) {
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
}
function cloneJson(value) {
return value == null ? null : JSON.parse(JSON.stringify(value));
}
function hasAiConfig() {
return Boolean(process.env.FINE_TUNE_AI_ENDPOINT && process.env.FINE_TUNE_AI_API_KEY && process.env.FINE_TUNE_AI_MODEL);
}
function hasQuestionGeneratorConfig() {
return Boolean(process.env.FINE_TUNE_BACKEND_URL && process.env.FINE_TUNE_BACKEND_TOKEN);
}
function assertAiConfig() {
if (!hasAiConfig()) {
const error = new Error("AI config is missing. Set FINE_TUNE_AI_ENDPOINT, FINE_TUNE_AI_API_KEY, and FINE_TUNE_AI_MODEL.");
error.status = 503;
throw error;
}
}
function assertQuestionGeneratorConfig() {
if (!hasQuestionGeneratorConfig()) {
const error = new Error(
"Question generator config is missing. Set FINE_TUNE_BACKEND_URL and FINE_TUNE_BACKEND_TOKEN.",
);
error.status = 503;
throw error;
}
}
function sanitizeGeneratorInput(payload = {}) {
return {
topic: stringOrEmpty(payload.topic),
difficulty: stringOrEmpty(payload.difficulty),
count: integerOrDefault(payload.count, 1),
};
}
function sanitizeAssignmentInput(payload = {}) {
const rawQuestions = Array.isArray(payload.questions) ? payload.questions : [];
return {
assignmentId: stringOrEmpty(payload.assignmentId),
studentId: stringOrEmpty(payload.studentId),
assignmentTitle: stringOrEmpty(payload.assignmentTitle),
instructions: stringOrEmpty(payload.instructions),
passThreshold: numberOrNull(payload.passThreshold),
topic: stringOrEmpty(payload.topic),
difficulty: stringOrEmpty(payload.difficulty),
questions: rawQuestions.map((question, index) => sanitizeQuestionInput(question, index)),
};
}
function sanitizeQuestionInput(payload = {}, index = 0) {
return {
questionId: integerOrNull(payload.questionId),
position: integerOrDefault(payload.position, index + 1),
title: stringOrEmpty(payload.title),
prompt: stringOrEmpty(payload.prompt),
subject: stringOrEmpty(payload.subject),
source: stringOrEmpty(payload.source),
difficulty: stringOrEmpty(payload.difficulty),
correctAnswer: stringOrEmpty(payload.correctAnswer),
workedSolution: stringOrEmpty(payload.workedSolution),
tags: stringArray(payload.tags),
studentAnswer: stringOrEmpty(payload.studentAnswer),
workingSteps: stringOrEmpty(payload.workingSteps),
solveMode: stringOrEmpty(payload.solveMode) || "show_work",
aiFeedback: stringOrEmpty(payload.aiFeedback),
understandingScore: numberOrNull(payload.understandingScore),
confidence: numberOrNull(payload.confidence),
needsAttention: booleanOrNull(payload.needsAttention),
issueReason: stringOrEmpty(payload.issueReason),
};
}
function assertGeneratorInput(input) {
if (!input.topic) {
const error = new Error("Topic is required.");
error.status = 400;
throw error;
}
if (!input.difficulty) {
const error = new Error("Difficulty is required.");
error.status = 400;
throw error;
}
if (!Number.isInteger(input.count) || input.count < 1 || input.count > 25) {
const error = new Error("Question count must be between 1 and 25.");
error.status = 400;
throw error;
}
}
function assertAssignmentForStudentDraft(input) {
assertAssignmentBase(input);
for (const question of input.questions) {
if (!question.correctAnswer) {
const error = new Error(`Question ${question.position} is missing a correct answer.`);
error.status = 400;
throw error;
}
if (!question.workedSolution) {
const error = new Error(`Question ${question.position} is missing a worked solution.`);
error.status = 400;
throw error;
}
}
}
function assertAssignmentForTeacherDraft(input) {
assertAssignmentBase(input);
for (const question of input.questions) {
if (!question.studentAnswer && !question.workingSteps) {
const error = new Error(`Question ${question.position} needs student work before teacher review can be drafted.`);
error.status = 400;
throw error;
}
}
}
function assertAssignmentBase(input) {
if (!input.assignmentTitle) {
const error = new Error("Assignment title is required.");
error.status = 400;
throw error;
}
if (!input.studentId) {
const error = new Error("Student ID is required.");
error.status = 400;
throw error;
}
if (!Array.isArray(input.questions) || input.questions.length === 0) {
const error = new Error("At least one question is required.");
error.status = 400;
throw error;
}
for (const question of input.questions) {
if (!Number.isInteger(question.questionId) || question.questionId < 1) {
const error = new Error(`Question ${question.position} needs a valid question ID.`);
error.status = 400;
throw error;
}
if (!question.prompt) {
const error = new Error(`Question ${question.position} is missing a prompt.`);
error.status = 400;
throw error;
}
}
}
async function callQuestionGenerator({ topic, difficulty, count }) {
const endpoint = `${trimTrailingSlash(process.env.FINE_TUNE_BACKEND_URL)}/api/questions/generate`;
const response = await fetch(endpoint, {
method: "POST",
headers: {
"content-type": "application/json",
authorization: `Bearer ${process.env.FINE_TUNE_BACKEND_TOKEN}`,
},
body: JSON.stringify({ topic, difficulty, count }),
});
if (!response.ok) {
const errorBody = await response.text();
const error = new Error(`Question generator failed (${response.status}): ${errorBody}`);
error.status = response.status;
throw error;
}
return response.json();
}
function mapGeneratedQuestion(generated, index) {
const question = generated?.question || {};
const workedSolution = Array.isArray(generated?.worked_solution) ? generated.worked_solution : [];
const tags = Array.isArray(generated?.tags) ? generated.tags.filter((tag) => typeof tag === "string" && tag.trim()) : [];
return {
questionId: integerOrNull(question.id),
position: index + 1,
title: stringOrEmpty(question.title),
prompt: stringOrEmpty(question.prompt),
subject: stringOrEmpty(question.subject) || "Mathematics",
source: stringOrEmpty(question.source),
difficulty: stringOrEmpty(question.difficulty),
correctAnswer: stringOrEmpty(question.correct_answer),
workedSolution: workedSolution.join("\n"),
tags,
studentAnswer: "",
workingSteps: "",
solveMode: "show_work",
aiFeedback: "",
understandingScore: null,
confidence: null,
needsAttention: null,
issueReason: "",
};
}
function buildStudentDraftPrompt(input) {
return [
`Assignment ID: ${input.assignmentId || "draft-assignment"}`,
`Assignment title: ${input.assignmentTitle}`,
`Instructions: ${input.instructions || "No extra instructions."}`,
`Student ID: ${input.studentId}`,
`Pass threshold: ${typeof input.passThreshold === "number" ? input.passThreshold : "Not set"}`,
"Generate a realistic student submission for every question below.",
...input.questions.map((question) =>
[
`Question ${question.position}`,
`questionId: ${question.questionId}`,
`title: ${question.title || "Untitled"}`,
`prompt: ${question.prompt}`,
`subject: ${question.subject || "Mathematics"}`,
`difficulty: ${question.difficulty || input.difficulty || "Not specified"}`,
`tags: ${question.tags.join(", ") || "None"}`,
`correctAnswer: ${question.correctAnswer}`,
`workedSolution: ${question.workedSolution}`,
"Return answerText, workingSteps, and solveMode for this question.",
].join("\n"),
),
].join("\n\n");
}
function buildTeacherDraftPrompt(input) {
return [
`Assignment ID: ${input.assignmentId || "draft-assignment"}`,
`Assignment title: ${input.assignmentTitle}`,
`Instructions: ${input.instructions || "No extra instructions."}`,
`Student ID: ${input.studentId}`,
`Pass threshold: ${typeof input.passThreshold === "number" ? input.passThreshold : "Not set"}`,
"Review the full assignment in one pass. Return every question review plus an assignmentSummary and recommendedNextStep.",
...input.questions.map((question) =>
[
`Question ${question.position}`,
`questionId: ${question.questionId}`,
`title: ${question.title || "Untitled"}`,
`prompt: ${question.prompt}`,
`subject: ${question.subject || "Mathematics"}`,
`source: ${question.source || "rng_generated"}`,
`correctAnswer: ${question.correctAnswer}`,
`questionTags: ${question.tags.join(", ") || "None"}`,
`solveMode: ${question.solveMode || "show_work"}`,
`answerText: ${question.studentAnswer || "No answer provided."}`,
`workingSteps: ${question.workingSteps || "No working shown."}`,
`answerStatus: ${deriveAnswerStatus(question)}`,
`isCorrect: ${deriveIsCorrect(question)}`,
].join("\n"),
),
].join("\n\n");
}
function normalizeStudentDraftQuestions(sourceQuestions, generatedQuestions) {
const generatedById = new Map();
if (Array.isArray(generatedQuestions)) {
for (const item of generatedQuestions) {
const questionId = integerOrNull(item?.questionId);
if (questionId) generatedById.set(questionId, item);
}
}
return sourceQuestions.map((question, index) => {
const draft = generatedById.get(question.questionId) || generatedQuestions?.[index] || {};
return {
questionId: question.questionId,
answerText: stringOrEmpty(draft.answerText),
workingSteps: stringOrEmpty(draft.workingSteps),
solveMode: stringOrEmpty(draft.solveMode) || question.solveMode || "show_work",
};
});
}
function normalizeTeacherDraftQuestions(sourceQuestions, generatedQuestions) {
const generatedById = new Map();
if (Array.isArray(generatedQuestions)) {
for (const item of generatedQuestions) {
const questionId = integerOrNull(item?.questionId);
if (questionId) generatedById.set(questionId, item);
}
}
return sourceQuestions.map((question, index) => {
const draft = generatedById.get(question.questionId) || generatedQuestions?.[index] || {};
return {
questionId: question.questionId,
aiFeedback: stringOrEmpty(draft.aiFeedback),
understandingScore: clampScore(draft.understandingScore),
confidence: clampScore(draft.confidence),
needsAttention: booleanOrDefault(draft.needsAttention, null),
issueReason: stringOrEmpty(draft.issueReason),
};
});
}
function deriveAnswerStatus(question) {
if (!question.studentAnswer && !question.workingSteps) return "unanswered";
return "submitted";
}
function deriveIsCorrect(question) {
if (!question.studentAnswer) return false;
return normalizeComparable(question.studentAnswer) === normalizeComparable(question.correctAnswer);
}
function normalizeComparable(value) {
return String(value || "")
.toLowerCase()
.replace(/\s+/g, " ")
.trim();
}
function stringOrEmpty(value) {
return typeof value === "string" ? value.trim() : "";
}
function stringArray(value) {
if (Array.isArray(value)) {
return value.map((item) => stringOrEmpty(item)).filter(Boolean);
}
if (typeof value === "string") {
return value
.split(",")
.map((item) => item.trim())
.filter(Boolean);
}
return [];
}
function integerOrNull(value) {
const parsed = Number.parseInt(String(value ?? "").trim(), 10);
if (!Number.isInteger(parsed) || parsed < 1) return null;
return parsed;
}
function integerOrDefault(value, fallback) {
const parsed = integerOrNull(value);
return parsed ?? fallback;
}
function numberOrNull(value) {
const parsed = Number(value);
if (!Number.isFinite(parsed)) return null;
return parsed;
}
function clampScore(value) {
const parsed = Number(value);
if (!Number.isFinite(parsed)) return null;
return Math.max(0, Math.min(1, Number(parsed.toFixed(2))));
}
function booleanOrNull(value) {
if (value === true || value === "true") return true;
if (value === false || value === "false") return false;
return null;
}
function booleanOrDefault(value, fallback) {
const parsed = booleanOrNull(value);
return parsed === null ? fallback : parsed;
}
function trimTrailingSlash(value) {
return String(value || "").replace(/\/+$/, "");
}
async function callAiJson({ systemPrompt, userPrompt, schemaName, schema }) {
const endpoint = process.env.FINE_TUNE_AI_ENDPOINT;
const model = process.env.FINE_TUNE_AI_MODEL;
const response = await fetch(endpoint, {
method: "POST",
headers: buildHeaders(endpoint),
body: JSON.stringify(buildRequestBody({ endpoint, model, systemPrompt, userPrompt, schemaName, schema })),
});
if (!response.ok) {
const errorBody = await response.text();
const error = new Error(`AI request failed (${response.status}): ${errorBody}`);
error.status = response.status;
throw error;
}
const payload = await response.json();
const rawText = isChatEndpoint(endpoint) ? extractChatText(payload) : extractResponsesText(payload);
if (!rawText) {
throw new Error("AI response did not include usable text output.");
}
return JSON.parse(rawText);
}
function buildHeaders(endpoint) {
const headers = {
"content-type": "application/json",
};
if (isAzureEndpoint(endpoint)) {
headers["api-key"] = process.env.FINE_TUNE_AI_API_KEY;
} else {
headers.authorization = `Bearer ${process.env.FINE_TUNE_AI_API_KEY}`;
}
return headers;
}
function buildRequestBody({ endpoint, model, systemPrompt, userPrompt, schemaName, schema }) {
if (isChatEndpoint(endpoint)) {
const body = {
model,
temperature: 0,
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: userPrompt },
],
response_format: {
type: "json_schema",
json_schema: {
name: schemaName,
strict: true,
schema,
},
},
};
if (!isAzureEndpoint(endpoint)) {
body.chat_template_kwargs = { enable_thinking: false };
}
return body;
}
return {
model,
input: [
{
role: "system",
content: [{ type: "input_text", text: systemPrompt }],
},
{
role: "user",
content: [{ type: "input_text", text: userPrompt }],
},
],
text: {
format: {
type: "json_schema",
name: schemaName,
strict: true,
schema,
},
},
};
}
function isChatEndpoint(endpoint) {
return typeof endpoint === "string" && endpoint.includes("/chat/completions");
}
function isAzureEndpoint(endpoint) {
return typeof endpoint === "string" && (endpoint.includes("cognitiveservices.azure.com") || endpoint.includes(".openai.azure.com"));
}
function extractChatText(payload) {
const content = payload?.choices?.[0]?.message?.content;
if (typeof content === "string") return content;
if (!Array.isArray(content)) return "";
return content
.map((part) => {
if (typeof part === "string") return part;
if (part && typeof part.text === "string") return part.text;
return "";
})
.filter(Boolean)
.join("\n");
}
function extractResponsesText(payload) {
if (typeof payload?.output_text === "string" && payload.output_text.trim()) {
return payload.output_text;
}
const queue = [payload];
while (queue.length) {
const current = queue.shift();
if (!current || typeof current !== "object") continue;
if (typeof current.output_text === "string" && current.output_text.trim()) return current.output_text;
if (typeof current.text === "string" && current.text.trim()) return current.text;
for (const value of Object.values(current)) {
if (Array.isArray(value)) queue.push(...value);
else if (value && typeof value === "object") queue.push(value);
}
}
return "";
}
function handleError(res, error) {
const status = Number.isInteger(error?.status) ? error.status : 500;
res.status(status).json({
message: error instanceof Error ? error.message : "Unexpected error",
});
}