BoostAI/Backend/cmd/backfill_historical_reviews/main.go

package main

import (
	"boostai-backend/internal/config"
	"boostai-backend/internal/database"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"log"
	"math"
	"os"
	"path/filepath"
	"sort"
	"strings"
	"time"
)

var historicalAssignmentIDs = map[int64]bool{
	3001: true,
	3002: true,
	3003: true,
	3004: true,
	3005: true,
}

type assignmentRecord struct {
	ID           int64  `json:"id"`
	Name         string `json:"name"`
	Status       string `json:"status"`
	MaximumMarks int    `json:"maximum_marks"`
}

type assigneeRecord struct {
	ID              int64    `json:"id"`
	AssignmentID    int64    `json:"assignment_id"`
	StudentID       int64    `json:"student_id"`
	Status          string   `json:"status"`
	OverallScore    *float64 `json:"overall_score"`
	AiFeedback      *string  `json:"ai_feedback"`
	NextStepOutcome *string  `json:"next_step_outcome"`
}

type assignmentQuestionRecord struct {
	ID             int64 `json:"id"`
	AssignmentID   int64 `json:"assignment_id"`
	QuestionBankID int64 `json:"question_bank_id"`
}

type studentAnswerRecord struct {
	AssigneeID                int64    `json:"assignee_id"`
	AssignmentQuestionID      int64    `json:"assignment_question_id"`
	AnswerLatex               string   `json:"answer_latex"`
	AiReasoning               string   `json:"ai_reasoning"`
	AiFeedback                *string  `json:"ai_feedback"`
	SolveMode                 *string  `json:"solve_mode"`
	UnderSolveMode            *string  `json:"_solve_mode"`
	IsCorrect                 *bool    `json:"is_correct"`
	UnderIsCorrect            *bool    `json:"_is_correct"`
	MisconceptionTag          *string  `json:"_misconception_tag"`
	QuestionTopic             *string  `json:"_question_topic"`
	ReviewNeedsAttention      *bool    `json:"review_needs_attention"`
	ReviewIssueReason         *string  `json:"review_issue_reason"`
	ReviewUnderstandingScore  *float64 `json:"review_understanding_score"`
	ReviewCorrectnessScore    *float64 `json:"review_correctness_score"`
	ReviewQuestionScore       *float64 `json:"review_question_score"`
	ReviewConfidence          *float64 `json:"review_confidence"`
	ReviewTags                []string `json:"review_tags"`
	CreatedAt                 int64    `json:"created_at"`
	AnsweredAt                *int64   `json:"_answered_at"`
}

type questionReviewUpdate struct {
	AssignmentID            int64
	StudentID               int64
	QuestionID              int64
	IsCorrect               bool
	AIFeedback              string
	NeedsAttention          bool
	IssueReason             string
	CorrectnessScore        float64
	UnderstandingScore      float64
	QuestionScore           float64
	Confidence              float64
	ReviewTags              []string
	Topic                   string
	QuestionContribution    float64
	AnsweredAt              time.Time
	HasAnsweredAt           bool
}

type assigneeSummary struct {
	AssignmentID      int64
	StudentID         int64
	AssignmentName    string
	OverallScore      *float64
	AIFeedback        string
	NextStepOutcome   string
	QuestionUpdates   []questionReviewUpdate
	CorrectCount      int
	NeedsAttentionCnt int
}

func main() {
	cfg := config.Load()
	db, err := database.NewPostgres(cfg.DatabaseURL)
	if err != nil {
		log.Fatalf("failed to connect to database: %v", err)
	}
	defer db.Close()

	mockDataDir, err := resolveMockDataDir()
	if err != nil {
		log.Fatalf("failed to resolve mock data directory: %v", err)
	}

	assignments, err := readJSON[[]assignmentRecord](filepath.Join(mockDataDir, "assignments.json"))
	if err != nil {
		log.Fatalf("failed to read assignments.json: %v", err)
	}
	assignees, err := readJSON[[]assigneeRecord](filepath.Join(mockDataDir, "assignment_assignees.json"))
	if err != nil {
		log.Fatalf("failed to read assignment_assignees.json: %v", err)
	}
	assignmentQuestions, err := readJSON[[]assignmentQuestionRecord](filepath.Join(mockDataDir, "assignment_questions.json"))
	if err != nil {
		log.Fatalf("failed to read assignment_questions.json: %v", err)
	}
	studentAnswers, err := readJSON[[]studentAnswerRecord](filepath.Join(mockDataDir, "student_answers.json"))
	if err != nil {
		log.Fatalf("failed to read student_answers.json: %v", err)
	}

	assignmentByID := map[int64]assignmentRecord{}
	for _, item := range assignments {
		assignmentByID[item.ID] = item
	}

	assigneeByID := map[int64]assigneeRecord{}
	for _, item := range assignees {
		assigneeByID[item.ID] = item
	}

	questionIDByAssignmentQuestionID := map[int64]int64{}
	for _, item := range assignmentQuestions {
		questionIDByAssignmentQuestionID[item.ID] = item.QuestionBankID
	}

	summaries := map[string]*assigneeSummary{}
	for _, row := range studentAnswers {
		assignee, ok := assigneeByID[row.AssigneeID]
		if !ok || !historicalAssignmentIDs[assignee.AssignmentID] {
			continue
		}
		questionID, ok := questionIDByAssignmentQuestionID[row.AssignmentQuestionID]
		if !ok {
			continue
		}
		assignment := assignmentByID[assignee.AssignmentID]
		key := fmt.Sprintf("%d:%d", assignee.AssignmentID, assignee.StudentID)
		summary := summaries[key]
		if summary == nil {
			summary = &assigneeSummary{
				AssignmentID:   assignee.AssignmentID,
				StudentID:      assignee.StudentID,
				AssignmentName: assignment.Name,
			}
			summaries[key] = summary
		}

		update := buildQuestionReviewUpdate(assignee, questionID, row)
		summary.QuestionUpdates = append(summary.QuestionUpdates, update)
		if update.IsCorrect {
			summary.CorrectCount++
		}
		if update.NeedsAttention {
			summary.NeedsAttentionCnt++
		}
	}

	for _, summary := range summaries {
		finalizeAssigneeSummary(summary)
	}

	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
	defer cancel()

	tx, err := db.Pool.Begin(ctx)
	if err != nil {
		log.Fatalf("failed to begin transaction: %v", err)
	}
	defer tx.Rollback(ctx)

	updatedAnswers := 0
	updatedAssignees := 0
	for _, summary := range summaries {
		for _, update := range summary.QuestionUpdates {
			_, err := tx.Exec(ctx, `
				UPDATE student_answers
				SET is_correct = $4,
				    ai_feedback = $5,
				    review_needs_attention = $6,
				    review_issue_reason = $7,
				    review_correctness_score = $8,
				    review_understanding_score = $9,
				    review_question_score = $10,
				    review_confidence = $11,
				    review_tags = $12,
				    updated_at = NOW()
				WHERE assignment_id = $1
				  AND student_id = $2
				  AND question_id = $3
			`,
				update.AssignmentID,
				update.StudentID,
				update.QuestionID,
				update.IsCorrect,
				nullableString(update.AIFeedback),
				update.NeedsAttention,
				nullableString(update.IssueReason),
				update.CorrectnessScore,
				update.UnderstandingScore,
				update.QuestionScore,
				update.Confidence,
				update.ReviewTags,
			)
			if err != nil {
				log.Fatalf("failed to update student answer (%d/%d/%d): %v", update.AssignmentID, update.StudentID, update.QuestionID, err)
			}
			updatedAnswers++
		}

		var overall any
		var passStatus string
		if summary.OverallScore == nil {
			passStatus = "pending"
			overall = nil
		} else {
			overall = *summary.OverallScore
			if *summary.OverallScore >= 6.0 {
				passStatus = "pass"
			} else {
				passStatus = "no_pass"
			}
		}

		_, err := tx.Exec(ctx, `
			UPDATE assignment_assignees
			SET overall_score = $3,
			    ai_feedback = $4,
			    pass_status = $5,
			    pass_status_override = NULL
			WHERE assignment_id = $1
			  AND student_id = $2
		`, summary.AssignmentID, summary.StudentID, overall, nullableString(summary.AIFeedback), passStatus)
		if err != nil {
			log.Fatalf("failed to update assignment assignee (%d/%d): %v", summary.AssignmentID, summary.StudentID, err)
		}
		updatedAssignees++
	}

	if err := tx.Commit(ctx); err != nil {
		log.Fatalf("failed to commit transaction: %v", err)
	}

	log.Printf("historical review backfill complete: %d answers updated, %d assignees updated", updatedAnswers, updatedAssignees)
}

func buildQuestionReviewUpdate(assignee assigneeRecord, questionID int64, row studentAnswerRecord) questionReviewUpdate {
	isCorrect := false
	if row.IsCorrect != nil {
		isCorrect = *row.IsCorrect
	} else if row.UnderIsCorrect != nil {
		isCorrect = *row.UnderIsCorrect
	} else {
		isCorrect = !strings.HasPrefix(strings.ToLower(strings.TrimSpace(row.AiReasoning)), "incorrect")
	}

	solveMode := firstNonEmptyString(row.SolveMode, row.UnderSolveMode)
	understanding := valueOrElse(row.ReviewUnderstandingScore, deriveUnderstandingScore(isCorrect, solveMode))
	confidence := valueOrElse(row.ReviewConfidence, deriveConfidenceScore(isCorrect, solveMode))
	needsAttention := boolOrElse(row.ReviewNeedsAttention, !isCorrect || understanding < 0.72)
	issueReason := stringOrElse(row.ReviewIssueReason, deriveIssueReason(row.AiReasoning, row.MisconceptionTag))
	aiFeedback := stringOrElse(row.AiFeedback, row.AiReasoning)
	reviewTags := sanitizeTags(row.ReviewTags, row.MisconceptionTag)
	correctness := valueOrElse(row.ReviewCorrectnessScore, 1.0)
	questionScore := valueOrElse(row.ReviewQuestionScore, 1.0)
	questionContribution := ((boolToFloat(isCorrect)) + understanding) / 2

	var answeredAt time.Time
	var hasAnsweredAt bool
	if row.AnsweredAt != nil && *row.AnsweredAt > 0 {
		answeredAt = time.UnixMilli(*row.AnsweredAt).UTC()
		hasAnsweredAt = true
	} else if row.CreatedAt > 0 {
		answeredAt = time.UnixMilli(row.CreatedAt).UTC()
		hasAnsweredAt = true
	}

	return questionReviewUpdate{
		AssignmentID:         assignee.AssignmentID,
		StudentID:            assignee.StudentID,
		QuestionID:           questionID,
		IsCorrect:            isCorrect,
		AIFeedback:           aiFeedback,
		NeedsAttention:       needsAttention,
		IssueReason:          issueReason,
		CorrectnessScore:     roundToThree(correctness),
		UnderstandingScore:   roundToThree(understanding),
		QuestionScore:        roundToThree(questionScore),
		Confidence:           roundToThree(confidence),
		ReviewTags:           reviewTags,
		Topic:                stringOrElse(row.QuestionTopic, "general"),
		QuestionContribution: questionContribution,
		AnsweredAt:           answeredAt,
		HasAnsweredAt:        hasAnsweredAt,
	}
}

func finalizeAssigneeSummary(summary *assigneeSummary) {
	if len(summary.QuestionUpdates) == 0 {
		return
	}
	var total float64
	topicScores := map[string][]float64{}
	for _, item := range summary.QuestionUpdates {
		total += item.QuestionContribution
		topicScores[item.Topic] = append(topicScores[item.Topic], item.QuestionContribution)
	}
	overall := roundToTwo(total / float64(len(summary.QuestionUpdates)) * 10)
	summary.OverallScore = &overall

	type topicAvg struct {
		name string
		avg  float64
	}
	var weakest []topicAvg
	for topic, scores := range topicScores {
		var subtotal float64
		for _, score := range scores {
			subtotal += score
		}
		weakest = append(weakest, topicAvg{name: topic, avg: subtotal / float64(len(scores))})
	}
	if len(weakest) > 1 {
		sort.Slice(weakest, func(i, j int) bool {
			if weakest[i].avg == weakest[j].avg {
				return weakest[i].name < weakest[j].name
			}
			return weakest[i].avg < weakest[j].avg
		})
	}
	weakestTopics := []string{}
	for i, item := range weakest {
		if i >= 2 {
			break
		}
		weakestTopics = append(weakestTopics, displayTopic(item.name))
	}
	weakestTopicText := "general fluency"
	if len(weakestTopics) > 0 {
		weakestTopicText = strings.Join(weakestTopics, ", ")
	}
	summary.NextStepOutcome = "accept"
	if overall < 4.5 {
		summary.NextStepOutcome = "redo"
	} else if overall < 6.0 {
		summary.NextStepOutcome = "support"
	}
	summary.AIFeedback = fmt.Sprintf(
		"Student completed %s with %d/%d correct responses. Overall score is %.2f/10. The weakest areas were %s. %d question(s) need extra attention.",
		summary.AssignmentName,
		summary.CorrectCount,
		len(summary.QuestionUpdates),
		overall,
		weakestTopicText,
		summary.NeedsAttentionCnt,
	)
}

func deriveUnderstandingScore(isCorrect bool, solveMode string) float64 {
	if isCorrect {
		return map[string]float64{
			"step_by_step": 0.95,
			"handwritten": 0.85,
			"just_answer": 0.75,
			"solve_together": 0.65,
		}[defaultSolveMode(solveMode)]
	}
	return map[string]float64{
		"step_by_step": 0.40,
		"handwritten": 0.32,
		"just_answer": 0.20,
		"solve_together": 0.28,
	}[defaultSolveMode(solveMode)]
}

func deriveConfidenceScore(isCorrect bool, solveMode string) float64 {
	if isCorrect {
		return map[string]float64{
			"step_by_step": 0.82,
			"handwritten": 0.78,
			"just_answer": 0.90,
			"solve_together": 0.62,
		}[defaultSolveMode(solveMode)]
	}
	return map[string]float64{
		"step_by_step": 0.55,
		"handwritten": 0.60,
		"just_answer": 0.72,
		"solve_together": 0.50,
	}[defaultSolveMode(solveMode)]
}

func deriveIssueReason(aiReasoning string, misconception *string) string {
	if misconception != nil && strings.TrimSpace(*misconception) != "" {
		switch strings.TrimSpace(*misconception) {
		case "add_tops_add_bottoms":
			return "The student added the numerator and denominator directly instead of finding a common denominator."
		case "fraction_op_confusion":
			return "The student confused the fraction operation and did not apply the correct method."
		case "fraction_general_uncertainty":
			return "The student shows insecure understanding of equivalent or comparable fractions."
		case "place_value_misalignment":
			return "The student misread place value, causing digits to be aligned incorrectly."
		case "arithmetic_slip":
			return "The final answer is wrong, suggesting a careless arithmetic slip rather than a secure method."
		case "scaffolding_dependence":
			return "The student appears dependent on scaffolding and does not show secure independent understanding."
		case "word_problem_interpretation":
			return "The student did not translate the word problem into the correct calculation."
		default:
			return strings.TrimSpace(*misconception)
		}
	}
	text := strings.TrimSpace(aiReasoning)
	if text == "" {
		return "The answer shows incomplete understanding of the method."
	}
	return text
}

func defaultSolveMode(value string) string {
	value = strings.TrimSpace(value)
	if value == "" {
		return "just_answer"
	}
	return value
}

func displayTopic(value string) string {
	value = strings.ReplaceAll(strings.TrimSpace(value), "_", " ")
	parts := strings.Fields(value)
	for i, part := range parts {
		parts[i] = strings.ToUpper(part[:1]) + part[1:]
	}
	return strings.Join(parts, " ")
}

func boolToFloat(value bool) float64 {
	if value {
		return 1
	}
	return 0
}

func roundToTwo(value float64) float64 {
	return math.Round(value*100) / 100
}

func roundToThree(value float64) float64 {
	return math.Round(value*1000) / 1000
}

func readJSON[T any](path string) (T, error) {
	var zero T
	data, err := os.ReadFile(path)
	if err != nil {
		return zero, err
	}
	var value T
	if err := json.Unmarshal(data, &value); err != nil {
		return zero, err
	}
	return value, nil
}

func resolveMockDataDir() (string, error) {
	if value := strings.TrimSpace(os.Getenv("MOCK_DATA_DIR")); value != "" {
		return value, nil
	}
	candidates := []string{
		filepath.Join("..", "Mock-Data"),
		filepath.Join(".", "Mock-Data"),
		filepath.Join("..", "..", "Mock-Data"),
	}
	for _, candidate := range candidates {
		if info, err := os.Stat(candidate); err == nil && info.IsDir() {
			return candidate, nil
		}
	}
	return "", errors.New("Mock-Data directory not found; set MOCK_DATA_DIR")
}

func valueOrElse(value *float64, fallback float64) float64 {
	if value != nil {
		return *value
	}
	return fallback
}

func boolOrElse(value *bool, fallback bool) bool {
	if value != nil {
		return *value
	}
	return fallback
}

func stringOrElse(value *string, fallback string) string {
	if value != nil && strings.TrimSpace(*value) != "" {
		return strings.TrimSpace(*value)
	}
	return strings.TrimSpace(fallback)
}

func firstNonEmptyString(values ...*string) string {
	for _, value := range values {
		if value != nil && strings.TrimSpace(*value) != "" {
			return strings.TrimSpace(*value)
		}
	}
	return ""
}

func sanitizeTags(tags []string, misconception *string) []string {
	seen := map[string]bool{}
	result := make([]string, 0, len(tags)+1)
	for _, tag := range tags {
		tag = strings.TrimSpace(tag)
		if tag == "" || seen[tag] {
			continue
		}
		seen[tag] = true
		result = append(result, tag)
	}
	if misconception != nil {
		tag := strings.TrimSpace(*misconception)
		if tag != "" && !seen[tag] {
			result = append(result, tag)
		}
	}
	return result
}

func nullableString(value string) any {
	if strings.TrimSpace(value) == "" {
		return nil
	}
	return strings.TrimSpace(value)
}