Files
BoostAI/Backend/cmd/backfill_historical_reviews/main.go
2026-05-25 17:05:06 +01:00

561 lines
16 KiB
Go

package main
import (
"boostai-backend/internal/config"
"boostai-backend/internal/database"
"context"
"encoding/json"
"errors"
"fmt"
"log"
"math"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
var historicalAssignmentIDs = map[int64]bool{
3001: true,
3002: true,
3003: true,
3004: true,
3005: true,
}
type assignmentRecord struct {
ID int64 `json:"id"`
Name string `json:"name"`
Status string `json:"status"`
MaximumMarks int `json:"maximum_marks"`
}
type assigneeRecord struct {
ID int64 `json:"id"`
AssignmentID int64 `json:"assignment_id"`
StudentID int64 `json:"student_id"`
Status string `json:"status"`
OverallScore *float64 `json:"overall_score"`
AiFeedback *string `json:"ai_feedback"`
NextStepOutcome *string `json:"next_step_outcome"`
}
type assignmentQuestionRecord struct {
ID int64 `json:"id"`
AssignmentID int64 `json:"assignment_id"`
QuestionBankID int64 `json:"question_bank_id"`
}
type studentAnswerRecord struct {
AssigneeID int64 `json:"assignee_id"`
AssignmentQuestionID int64 `json:"assignment_question_id"`
AnswerLatex string `json:"answer_latex"`
AiReasoning string `json:"ai_reasoning"`
AiFeedback *string `json:"ai_feedback"`
SolveMode *string `json:"solve_mode"`
UnderSolveMode *string `json:"_solve_mode"`
IsCorrect *bool `json:"is_correct"`
UnderIsCorrect *bool `json:"_is_correct"`
MisconceptionTag *string `json:"_misconception_tag"`
QuestionTopic *string `json:"_question_topic"`
ReviewNeedsAttention *bool `json:"review_needs_attention"`
ReviewIssueReason *string `json:"review_issue_reason"`
ReviewUnderstandingScore *float64 `json:"review_understanding_score"`
ReviewCorrectnessScore *float64 `json:"review_correctness_score"`
ReviewQuestionScore *float64 `json:"review_question_score"`
ReviewConfidence *float64 `json:"review_confidence"`
ReviewTags []string `json:"review_tags"`
CreatedAt int64 `json:"created_at"`
AnsweredAt *int64 `json:"_answered_at"`
}
type questionReviewUpdate struct {
AssignmentID int64
StudentID int64
QuestionID int64
IsCorrect bool
AIFeedback string
NeedsAttention bool
IssueReason string
CorrectnessScore float64
UnderstandingScore float64
QuestionScore float64
Confidence float64
ReviewTags []string
Topic string
QuestionContribution float64
AnsweredAt time.Time
HasAnsweredAt bool
}
type assigneeSummary struct {
AssignmentID int64
StudentID int64
AssignmentName string
OverallScore *float64
AIFeedback string
NextStepOutcome string
QuestionUpdates []questionReviewUpdate
CorrectCount int
NeedsAttentionCnt int
}
func main() {
cfg := config.Load()
db, err := database.NewPostgres(cfg.DatabaseURL)
if err != nil {
log.Fatalf("failed to connect to database: %v", err)
}
defer db.Close()
mockDataDir, err := resolveMockDataDir()
if err != nil {
log.Fatalf("failed to resolve mock data directory: %v", err)
}
assignments, err := readJSON[[]assignmentRecord](filepath.Join(mockDataDir, "assignments.json"))
if err != nil {
log.Fatalf("failed to read assignments.json: %v", err)
}
assignees, err := readJSON[[]assigneeRecord](filepath.Join(mockDataDir, "assignment_assignees.json"))
if err != nil {
log.Fatalf("failed to read assignment_assignees.json: %v", err)
}
assignmentQuestions, err := readJSON[[]assignmentQuestionRecord](filepath.Join(mockDataDir, "assignment_questions.json"))
if err != nil {
log.Fatalf("failed to read assignment_questions.json: %v", err)
}
studentAnswers, err := readJSON[[]studentAnswerRecord](filepath.Join(mockDataDir, "student_answers.json"))
if err != nil {
log.Fatalf("failed to read student_answers.json: %v", err)
}
assignmentByID := map[int64]assignmentRecord{}
for _, item := range assignments {
assignmentByID[item.ID] = item
}
assigneeByID := map[int64]assigneeRecord{}
for _, item := range assignees {
assigneeByID[item.ID] = item
}
questionIDByAssignmentQuestionID := map[int64]int64{}
for _, item := range assignmentQuestions {
questionIDByAssignmentQuestionID[item.ID] = item.QuestionBankID
}
summaries := map[string]*assigneeSummary{}
for _, row := range studentAnswers {
assignee, ok := assigneeByID[row.AssigneeID]
if !ok || !historicalAssignmentIDs[assignee.AssignmentID] {
continue
}
questionID, ok := questionIDByAssignmentQuestionID[row.AssignmentQuestionID]
if !ok {
continue
}
assignment := assignmentByID[assignee.AssignmentID]
key := fmt.Sprintf("%d:%d", assignee.AssignmentID, assignee.StudentID)
summary := summaries[key]
if summary == nil {
summary = &assigneeSummary{
AssignmentID: assignee.AssignmentID,
StudentID: assignee.StudentID,
AssignmentName: assignment.Name,
}
summaries[key] = summary
}
update := buildQuestionReviewUpdate(assignee, questionID, row)
summary.QuestionUpdates = append(summary.QuestionUpdates, update)
if update.IsCorrect {
summary.CorrectCount++
}
if update.NeedsAttention {
summary.NeedsAttentionCnt++
}
}
for _, summary := range summaries {
finalizeAssigneeSummary(summary)
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
tx, err := db.Pool.Begin(ctx)
if err != nil {
log.Fatalf("failed to begin transaction: %v", err)
}
defer tx.Rollback(ctx)
updatedAnswers := 0
updatedAssignees := 0
for _, summary := range summaries {
for _, update := range summary.QuestionUpdates {
_, err := tx.Exec(ctx, `
UPDATE student_answers
SET is_correct = $4,
ai_feedback = $5,
review_needs_attention = $6,
review_issue_reason = $7,
review_correctness_score = $8,
review_understanding_score = $9,
review_question_score = $10,
review_confidence = $11,
review_tags = $12,
updated_at = NOW()
WHERE assignment_id = $1
AND student_id = $2
AND question_id = $3
`,
update.AssignmentID,
update.StudentID,
update.QuestionID,
update.IsCorrect,
nullableString(update.AIFeedback),
update.NeedsAttention,
nullableString(update.IssueReason),
update.CorrectnessScore,
update.UnderstandingScore,
update.QuestionScore,
update.Confidence,
update.ReviewTags,
)
if err != nil {
log.Fatalf("failed to update student answer (%d/%d/%d): %v", update.AssignmentID, update.StudentID, update.QuestionID, err)
}
updatedAnswers++
}
var overall any
var passStatus string
if summary.OverallScore == nil {
passStatus = "pending"
overall = nil
} else {
overall = *summary.OverallScore
if *summary.OverallScore >= 6.0 {
passStatus = "pass"
} else {
passStatus = "no_pass"
}
}
_, err := tx.Exec(ctx, `
UPDATE assignment_assignees
SET overall_score = $3,
ai_feedback = $4,
pass_status = $5,
pass_status_override = NULL
WHERE assignment_id = $1
AND student_id = $2
`, summary.AssignmentID, summary.StudentID, overall, nullableString(summary.AIFeedback), passStatus)
if err != nil {
log.Fatalf("failed to update assignment assignee (%d/%d): %v", summary.AssignmentID, summary.StudentID, err)
}
updatedAssignees++
}
if err := tx.Commit(ctx); err != nil {
log.Fatalf("failed to commit transaction: %v", err)
}
log.Printf("historical review backfill complete: %d answers updated, %d assignees updated", updatedAnswers, updatedAssignees)
}
func buildQuestionReviewUpdate(assignee assigneeRecord, questionID int64, row studentAnswerRecord) questionReviewUpdate {
isCorrect := false
if row.IsCorrect != nil {
isCorrect = *row.IsCorrect
} else if row.UnderIsCorrect != nil {
isCorrect = *row.UnderIsCorrect
} else {
isCorrect = !strings.HasPrefix(strings.ToLower(strings.TrimSpace(row.AiReasoning)), "incorrect")
}
solveMode := firstNonEmptyString(row.SolveMode, row.UnderSolveMode)
understanding := valueOrElse(row.ReviewUnderstandingScore, deriveUnderstandingScore(isCorrect, solveMode))
confidence := valueOrElse(row.ReviewConfidence, deriveConfidenceScore(isCorrect, solveMode))
needsAttention := boolOrElse(row.ReviewNeedsAttention, !isCorrect || understanding < 0.72)
issueReason := stringOrElse(row.ReviewIssueReason, deriveIssueReason(row.AiReasoning, row.MisconceptionTag))
aiFeedback := stringOrElse(row.AiFeedback, row.AiReasoning)
reviewTags := sanitizeTags(row.ReviewTags, row.MisconceptionTag)
correctness := valueOrElse(row.ReviewCorrectnessScore, 1.0)
questionScore := valueOrElse(row.ReviewQuestionScore, 1.0)
questionContribution := ((boolToFloat(isCorrect)) + understanding) / 2
var answeredAt time.Time
var hasAnsweredAt bool
if row.AnsweredAt != nil && *row.AnsweredAt > 0 {
answeredAt = time.UnixMilli(*row.AnsweredAt).UTC()
hasAnsweredAt = true
} else if row.CreatedAt > 0 {
answeredAt = time.UnixMilli(row.CreatedAt).UTC()
hasAnsweredAt = true
}
return questionReviewUpdate{
AssignmentID: assignee.AssignmentID,
StudentID: assignee.StudentID,
QuestionID: questionID,
IsCorrect: isCorrect,
AIFeedback: aiFeedback,
NeedsAttention: needsAttention,
IssueReason: issueReason,
CorrectnessScore: roundToThree(correctness),
UnderstandingScore: roundToThree(understanding),
QuestionScore: roundToThree(questionScore),
Confidence: roundToThree(confidence),
ReviewTags: reviewTags,
Topic: stringOrElse(row.QuestionTopic, "general"),
QuestionContribution: questionContribution,
AnsweredAt: answeredAt,
HasAnsweredAt: hasAnsweredAt,
}
}
func finalizeAssigneeSummary(summary *assigneeSummary) {
if len(summary.QuestionUpdates) == 0 {
return
}
var total float64
topicScores := map[string][]float64{}
for _, item := range summary.QuestionUpdates {
total += item.QuestionContribution
topicScores[item.Topic] = append(topicScores[item.Topic], item.QuestionContribution)
}
overall := roundToTwo(total / float64(len(summary.QuestionUpdates)) * 10)
summary.OverallScore = &overall
type topicAvg struct {
name string
avg float64
}
var weakest []topicAvg
for topic, scores := range topicScores {
var subtotal float64
for _, score := range scores {
subtotal += score
}
weakest = append(weakest, topicAvg{name: topic, avg: subtotal / float64(len(scores))})
}
if len(weakest) > 1 {
sort.Slice(weakest, func(i, j int) bool {
if weakest[i].avg == weakest[j].avg {
return weakest[i].name < weakest[j].name
}
return weakest[i].avg < weakest[j].avg
})
}
weakestTopics := []string{}
for i, item := range weakest {
if i >= 2 {
break
}
weakestTopics = append(weakestTopics, displayTopic(item.name))
}
weakestTopicText := "general fluency"
if len(weakestTopics) > 0 {
weakestTopicText = strings.Join(weakestTopics, ", ")
}
summary.NextStepOutcome = "accept"
if overall < 4.5 {
summary.NextStepOutcome = "redo"
} else if overall < 6.0 {
summary.NextStepOutcome = "support"
}
summary.AIFeedback = fmt.Sprintf(
"Student completed %s with %d/%d correct responses. Overall score is %.2f/10. The weakest areas were %s. %d question(s) need extra attention.",
summary.AssignmentName,
summary.CorrectCount,
len(summary.QuestionUpdates),
overall,
weakestTopicText,
summary.NeedsAttentionCnt,
)
}
func deriveUnderstandingScore(isCorrect bool, solveMode string) float64 {
if isCorrect {
return map[string]float64{
"step_by_step": 0.95,
"handwritten": 0.85,
"just_answer": 0.75,
"solve_together": 0.65,
}[defaultSolveMode(solveMode)]
}
return map[string]float64{
"step_by_step": 0.40,
"handwritten": 0.32,
"just_answer": 0.20,
"solve_together": 0.28,
}[defaultSolveMode(solveMode)]
}
func deriveConfidenceScore(isCorrect bool, solveMode string) float64 {
if isCorrect {
return map[string]float64{
"step_by_step": 0.82,
"handwritten": 0.78,
"just_answer": 0.90,
"solve_together": 0.62,
}[defaultSolveMode(solveMode)]
}
return map[string]float64{
"step_by_step": 0.55,
"handwritten": 0.60,
"just_answer": 0.72,
"solve_together": 0.50,
}[defaultSolveMode(solveMode)]
}
func deriveIssueReason(aiReasoning string, misconception *string) string {
if misconception != nil && strings.TrimSpace(*misconception) != "" {
switch strings.TrimSpace(*misconception) {
case "add_tops_add_bottoms":
return "The student added the numerator and denominator directly instead of finding a common denominator."
case "fraction_op_confusion":
return "The student confused the fraction operation and did not apply the correct method."
case "fraction_general_uncertainty":
return "The student shows insecure understanding of equivalent or comparable fractions."
case "place_value_misalignment":
return "The student misread place value, causing digits to be aligned incorrectly."
case "arithmetic_slip":
return "The final answer is wrong, suggesting a careless arithmetic slip rather than a secure method."
case "scaffolding_dependence":
return "The student appears dependent on scaffolding and does not show secure independent understanding."
case "word_problem_interpretation":
return "The student did not translate the word problem into the correct calculation."
default:
return strings.TrimSpace(*misconception)
}
}
text := strings.TrimSpace(aiReasoning)
if text == "" {
return "The answer shows incomplete understanding of the method."
}
return text
}
func defaultSolveMode(value string) string {
value = strings.TrimSpace(value)
if value == "" {
return "just_answer"
}
return value
}
func displayTopic(value string) string {
value = strings.ReplaceAll(strings.TrimSpace(value), "_", " ")
parts := strings.Fields(value)
for i, part := range parts {
parts[i] = strings.ToUpper(part[:1]) + part[1:]
}
return strings.Join(parts, " ")
}
func boolToFloat(value bool) float64 {
if value {
return 1
}
return 0
}
func roundToTwo(value float64) float64 {
return math.Round(value*100) / 100
}
func roundToThree(value float64) float64 {
return math.Round(value*1000) / 1000
}
func readJSON[T any](path string) (T, error) {
var zero T
data, err := os.ReadFile(path)
if err != nil {
return zero, err
}
var value T
if err := json.Unmarshal(data, &value); err != nil {
return zero, err
}
return value, nil
}
func resolveMockDataDir() (string, error) {
if value := strings.TrimSpace(os.Getenv("MOCK_DATA_DIR")); value != "" {
return value, nil
}
candidates := []string{
filepath.Join("..", "Mock-Data"),
filepath.Join(".", "Mock-Data"),
filepath.Join("..", "..", "Mock-Data"),
}
for _, candidate := range candidates {
if info, err := os.Stat(candidate); err == nil && info.IsDir() {
return candidate, nil
}
}
return "", errors.New("Mock-Data directory not found; set MOCK_DATA_DIR")
}
func valueOrElse(value *float64, fallback float64) float64 {
if value != nil {
return *value
}
return fallback
}
func boolOrElse(value *bool, fallback bool) bool {
if value != nil {
return *value
}
return fallback
}
func stringOrElse(value *string, fallback string) string {
if value != nil && strings.TrimSpace(*value) != "" {
return strings.TrimSpace(*value)
}
return strings.TrimSpace(fallback)
}
func firstNonEmptyString(values ...*string) string {
for _, value := range values {
if value != nil && strings.TrimSpace(*value) != "" {
return strings.TrimSpace(*value)
}
}
return ""
}
func sanitizeTags(tags []string, misconception *string) []string {
seen := map[string]bool{}
result := make([]string, 0, len(tags)+1)
for _, tag := range tags {
tag = strings.TrimSpace(tag)
if tag == "" || seen[tag] {
continue
}
seen[tag] = true
result = append(result, tag)
}
if misconception != nil {
tag := strings.TrimSpace(*misconception)
if tag != "" && !seen[tag] {
result = append(result, tag)
}
}
return result
}
func nullableString(value string) any {
if strings.TrimSpace(value) == "" {
return nil
}
return strings.TrimSpace(value)
}