""" Mock pupil-history dataset generator for the BoostAI "Learning Path Agent" hackathon challenge. Outputs JSON files under the same directory, mirroring the production SQLAlchemy schema in elevenplus-backend/src/app/models/. The generator is deterministic (seeded RNG) so re-runs produce identical output. Run: python3 generate.py Outputs: classroom.json, students.json, question_bank.json, assignments.json, assignment_questions.json, assignment_assignees.json, student_answers.json, activity_logs.json, dataset.json """ from __future__ import annotations import json import os import random from datetime import datetime, timedelta, timezone from pathlib import Path # Reference date — keep in sync with the hackathon brief. TODAY = datetime(2026, 5, 1, 9, 0, 0, tzinfo=timezone.utc) OUT_DIR = Path(__file__).parent RNG = random.Random(20260501) def ms(dt: datetime) -> int: return int(dt.timestamp() * 1000) def days_ago(n: float, hour: int = 10, minute: int = 0) -> datetime: return (TODAY - timedelta(days=n)).replace(hour=hour, minute=minute, second=0, microsecond=0) # --------------------------------------------------------------------------- # Classroom + Tutor + Students # --------------------------------------------------------------------------- TUTOR = { "id": 100, "fullname": "Sarah Johnson", "email": "sarah.johnson@boostai.example", "username": "sjohnson", "role": "tutor", "active": True, "is_test": False, "is_deleted": False, "created_at": ms(days_ago(180)), "updated_at": ms(days_ago(180)), } CLASSROOM = { "id": 500, "name": "Year 6 — Maths Set 1", "organization_id": 1, "tutor_id": TUTOR["id"], "invite_code": "Y6MATHS1", "target_level": 6, "archived": False, "hide_just_answer": False, "is_deleted": False, "created_at": ms(days_ago(60)), "updated_at": ms(days_ago(60)), } # 12 students. _persona is a hackathon annotation (not in the production # schema) — used to drive answer generation below and to document expected # agent output. Strip the underscore-prefixed fields if seeding the real DB. STUDENTS_RAW = [ (201, "Aisha Khan", "fraction_inversion"), (202, "Ben Carter", "place_value_gaps"), (203, "Chen Wei", "rushed_careless"), (204, "Daniela Rossi", "solve_together_dependent"), (205, "Elif Demir", "word_problem_weak"), (206, "Felix Brown", "stable_strong"), (207, "Grace Park", "stable_strong"), (208, "Harry Singh", "stable_mid"), (209, "Isla Nakamura", "stable_mid"), (210, "Jaden Williams", "stable_mid"), (211, "Kira Patel", "stable_weak"), (212, "Liam O'Connor", "stable_weak"), ] STUDENTS = [] CLASSROOM_STUDENT_RS = [] for sid, fullname, persona in STUDENTS_RAW: first = fullname.split()[0].lower().replace("'", "") STUDENTS.append({ "id": sid, "fullname": fullname, "email": f"{first}.{sid}@boostai.example", "username": f"{first}{sid}", "role": "student", "active": True, "is_test": False, "is_deleted": False, "created_at": ms(days_ago(55)), "updated_at": ms(days_ago(55)), "_persona": persona, }) CLASSROOM_STUDENT_RS.append({ "id": 600 + sid, "classroom_id": CLASSROOM["id"], "student_id": sid, "created_at": ms(days_ago(55)), }) # --------------------------------------------------------------------------- # Question Bank — Maths, 11+ # Fields mirror question_bank.py. # _wrong_answer_map: hackathon helper — for misconception personas, the # answer they typically produce. Not in production schema. # --------------------------------------------------------------------------- QUESTION_BANK = [ # ---- Place Value (tens, hundreds, thousands, decimals) ---- {"id": 1001, "topic": "Place Value", "sub_topic": "Multi-digit numbers", "tag": None, "difficulty": "EASY", "question_text": "What is the value of the digit 7 in the number 4,732?", "correct_answer": "700", "_wrong_answers": {"place_value_gaps": "70"}}, {"id": 1002, "topic": "Place Value", "sub_topic": "Multi-digit numbers", "tag": None, "difficulty": "MEDIUM", "question_text": "Round 24,587 to the nearest thousand.", "correct_answer": "25000", "_wrong_answers": {"place_value_gaps": "24000"}}, {"id": 1003, "topic": "Place Value", "sub_topic": "Decimals", "tag": None, "difficulty": "MEDIUM", "question_text": "Write 0.07 as a fraction in its simplest form.", "correct_answer": "7/100", "_wrong_answers": {"place_value_gaps": "7/10"}}, {"id": 1004, "topic": "Place Value", "sub_topic": "Decimals", "tag": None, "difficulty": "HARD", "question_text": "What is 0.3 + 0.07?", "correct_answer": "0.37", "_wrong_answers": {"place_value_gaps": "0.10"}}, # ---- Arithmetic ---- {"id": 1101, "topic": "Arithmetic", "sub_topic": "Addition", "tag": None, "difficulty": "EASY", "question_text": "Calculate 246 + 137.", "correct_answer": "383", "_wrong_answers": {"rushed_careless": "373"}}, {"id": 1102, "topic": "Arithmetic", "sub_topic": "Subtraction", "tag": None, "difficulty": "MEDIUM", "question_text": "Calculate 503 - 47.", "correct_answer": "456", "_wrong_answers": {"place_value_gaps": "544", "rushed_careless": "466"}}, {"id": 1103, "topic": "Arithmetic", "sub_topic": "Subtraction", "tag": None, "difficulty": "HARD", "question_text": "Calculate 4,002 - 1,375.", "correct_answer": "2627", "_wrong_answers": {"place_value_gaps": "3737", "rushed_careless": "2617"}}, {"id": 1104, "topic": "Arithmetic", "sub_topic": "Multiplication", "tag": None, "difficulty": "MEDIUM", "question_text": "Calculate 28 x 7.", "correct_answer": "196", "_wrong_answers": {"rushed_careless": "186"}}, {"id": 1105, "topic": "Arithmetic", "sub_topic": "Division", "tag": None, "difficulty": "MEDIUM", "question_text": "Calculate 144 / 6.", "correct_answer": "24", "_wrong_answers": {"rushed_careless": "26"}}, {"id": 1106, "topic": "Arithmetic", "sub_topic": "Multiplication", "tag": None, "difficulty": "HARD", "question_text": "Calculate 156 x 24.", "correct_answer": "3744", "_wrong_answers": {"rushed_careless": "3724", "place_value_gaps": "374"}}, # ---- Negative Numbers ---- {"id": 1201, "topic": "Negative Numbers", "sub_topic": "Addition", "tag": None, "difficulty": "EASY", "question_text": "What is -5 + 8?", "correct_answer": "3"}, {"id": 1202, "topic": "Negative Numbers", "sub_topic": "Subtraction", "tag": None, "difficulty": "MEDIUM", "question_text": "What is 4 - 9?", "correct_answer": "-5"}, {"id": 1203, "topic": "Negative Numbers", "sub_topic": "Mixed", "tag": None, "difficulty": "HARD", "question_text": "What is -7 - (-3)?", "correct_answer": "-4"}, # ---- BIDMAS ---- {"id": 1301, "topic": "BIDMAS", "sub_topic": None, "tag": None, "difficulty": "EASY", "question_text": "Calculate 5 + 3 x 2.", "correct_answer": "11"}, {"id": 1302, "topic": "BIDMAS", "sub_topic": None, "tag": None, "difficulty": "MEDIUM", "question_text": "Calculate (8 - 3) x 4 + 2.", "correct_answer": "22"}, {"id": 1303, "topic": "BIDMAS", "sub_topic": None, "tag": None, "difficulty": "HARD", "question_text": "Calculate 24 / (2 + 4) + 3 x 5.", "correct_answer": "19", "_wrong_answers": {"rushed_careless": "17"}}, # ---- Fractions: Equivalent ---- {"id": 1401, "topic": "Fractions", "sub_topic": "Equivalent", "tag": None, "difficulty": "EASY", "question_text": "Which fraction is equivalent to 2/4?", "correct_answer": "1/2"}, {"id": 1402, "topic": "Fractions", "sub_topic": "Equivalent", "tag": None, "difficulty": "MEDIUM", "question_text": "Simplify 12/18 to its lowest terms.", "correct_answer": "2/3"}, {"id": 1403, "topic": "Fractions", "sub_topic": "Equivalent", "tag": None, "difficulty": "HARD", "question_text": "Which is larger: 3/5 or 5/8? Give your answer.", "correct_answer": "5/8"}, # ---- Fractions: Add/Subtract (CORE for fraction_inversion persona) ---- {"id": 1411, "topic": "Fractions", "sub_topic": "Add", "tag": None, "difficulty": "EASY", "question_text": "What is 1/2 + 1/3?", "correct_answer": "5/6", "_wrong_answers": {"fraction_inversion": "2/5"}}, {"id": 1412, "topic": "Fractions", "sub_topic": "Add", "tag": None, "difficulty": "EASY", "question_text": "What is 1/4 + 1/2?", "correct_answer": "3/4", "_wrong_answers": {"fraction_inversion": "2/6"}}, {"id": 1413, "topic": "Fractions", "sub_topic": "Add", "tag": None, "difficulty": "MEDIUM", "question_text": "What is 2/5 + 1/3?", "correct_answer": "11/15", "_wrong_answers": {"fraction_inversion": "3/8"}}, {"id": 1414, "topic": "Fractions", "sub_topic": "Add", "tag": None, "difficulty": "MEDIUM", "question_text": "What is 3/4 + 1/6?", "correct_answer": "11/12", "_wrong_answers": {"fraction_inversion": "4/10"}}, {"id": 1415, "topic": "Fractions", "sub_topic": "Subtract", "tag": None, "difficulty": "MEDIUM", "question_text": "What is 5/6 - 1/3?", "correct_answer": "1/2", "_wrong_answers": {"fraction_inversion": "4/3"}}, {"id": 1416, "topic": "Fractions", "sub_topic": "Add", "tag": None, "difficulty": "HARD", "question_text": "What is 7/12 + 5/8?", "correct_answer": "29/24", "_wrong_answers": {"fraction_inversion": "12/20"}}, # ---- Fractions: Multiply ---- {"id": 1421, "topic": "Fractions", "sub_topic": "Multiply", "tag": None, "difficulty": "EASY", "question_text": "What is 1/2 x 1/3?", "correct_answer": "1/6", "_wrong_answers": {"fraction_inversion": "2/3"}}, {"id": 1422, "topic": "Fractions", "sub_topic": "Multiply", "tag": None, "difficulty": "MEDIUM", "question_text": "What is 2/3 x 3/4?", "correct_answer": "1/2", "_wrong_answers": {"fraction_inversion": "5/7"}}, {"id": 1423, "topic": "Fractions", "sub_topic": "Multiply", "tag": None, "difficulty": "HARD", "question_text": "What is 4/5 of 35?", "correct_answer": "28", "_wrong_answers": {"fraction_inversion": "20"}}, # ---- Algebra: Simple Equations ---- {"id": 1501, "topic": "Algebra", "sub_topic": "Simple Equations", "tag": None, "difficulty": "EASY", "question_text": "Solve x + 7 = 12.", "correct_answer": "5"}, {"id": 1502, "topic": "Algebra", "sub_topic": "Simple Equations", "tag": None, "difficulty": "MEDIUM", "question_text": "Solve 3x - 4 = 17.", "correct_answer": "7", "_wrong_answers": {"rushed_careless": "8"}}, {"id": 1503, "topic": "Algebra", "sub_topic": "Simple Equations", "tag": None, "difficulty": "HARD", "question_text": "Solve 2(x + 3) = 18.", "correct_answer": "6"}, # ---- Algebra: Sequences ---- {"id": 1511, "topic": "Algebra", "sub_topic": "Sequences", "tag": None, "difficulty": "EASY", "question_text": "What is the next term: 2, 5, 8, 11, ___ ?", "correct_answer": "14"}, {"id": 1512, "topic": "Algebra", "sub_topic": "Sequences", "tag": None, "difficulty": "MEDIUM", "question_text": "Find the 10th term of the sequence 4, 7, 10, 13, ...", "correct_answer": "31"}, {"id": 1513, "topic": "Algebra", "sub_topic": "Sequences", "tag": None, "difficulty": "HARD", "question_text": "What is the nth term of the sequence 5, 8, 11, 14, ...?", "correct_answer": "3n+2"}, # ---- Geometry: Area & Perimeter ---- {"id": 1601, "topic": "Geometry", "sub_topic": "Perimeter", "tag": None, "difficulty": "EASY", "question_text": "What is the perimeter of a square with side length 7 cm?", "correct_answer": "28"}, {"id": 1602, "topic": "Geometry", "sub_topic": "Area", "tag": None, "difficulty": "MEDIUM", "question_text": "What is the area of a rectangle 8 cm by 5 cm?", "correct_answer": "40"}, {"id": 1603, "topic": "Geometry", "sub_topic": "Area", "tag": None, "difficulty": "HARD", "question_text": "A right-angled triangle has base 6 cm and height 9 cm. What is its area?", "correct_answer": "27"}, # ---- Geometry: Angles ---- {"id": 1611, "topic": "Geometry", "sub_topic": "Angles", "tag": None, "difficulty": "EASY", "question_text": "Two angles on a straight line are 110° and x. What is x?", "correct_answer": "70"}, {"id": 1612, "topic": "Geometry", "sub_topic": "Angles", "tag": None, "difficulty": "MEDIUM", "question_text": "The angles of a triangle are 45°, 60° and x. What is x?", "correct_answer": "75"}, {"id": 1613, "topic": "Geometry", "sub_topic": "Angles", "tag": None, "difficulty": "HARD", "question_text": "What is the sum of interior angles of a hexagon?", "correct_answer": "720"}, # ---- Data: Mean / Median / Mode ---- {"id": 1701, "topic": "Data", "sub_topic": "Mean", "tag": None, "difficulty": "EASY", "question_text": "Find the mean of 4, 6, 8, 10, 12.", "correct_answer": "8"}, {"id": 1702, "topic": "Data", "sub_topic": "Median", "tag": None, "difficulty": "MEDIUM", "question_text": "Find the median of 3, 7, 2, 9, 5.", "correct_answer": "5"}, {"id": 1703, "topic": "Data", "sub_topic": "Mode", "tag": None, "difficulty": "EASY", "question_text": "Find the mode of 2, 3, 3, 5, 7, 7, 7, 8.", "correct_answer": "7"}, # ---- Data: Probability ---- {"id": 1711, "topic": "Data", "sub_topic": "Probability", "tag": None, "difficulty": "MEDIUM", "question_text": "A bag has 3 red and 5 blue marbles. What is the probability of red?", "correct_answer": "3/8"}, {"id": 1712, "topic": "Data", "sub_topic": "Probability", "tag": None, "difficulty": "HARD", "question_text": "A fair die is rolled. What is the probability of an even number greater than 2?", "correct_answer": "1/3"}, # ---- Word problems (cross-topic) ---- {"id": 1801, "topic": "Arithmetic", "sub_topic": "Word problem", "tag": "word_problem", "difficulty": "EASY", "question_text": "Tom has 24 apples. He gives 9 to his friend. How many does he have left?", "correct_answer": "15"}, {"id": 1802, "topic": "Fractions", "sub_topic": "Word problem", "tag": "word_problem", "difficulty": "MEDIUM", "question_text": "A pizza is cut into 8 slices. Sara eats 1/4 and Tom eats 3/8. What fraction is left?", "correct_answer": "3/8", "_wrong_answers": {"word_problem_weak": "1/2", "fraction_inversion": "4/12"}}, {"id": 1803, "topic": "Arithmetic", "sub_topic": "Word problem", "tag": "word_problem", "difficulty": "MEDIUM", "question_text": "A train ticket costs £8.50. How much do 6 tickets cost?", "correct_answer": "51", "_wrong_answers": {"word_problem_weak": "48", "rushed_careless": "50"}}, {"id": 1804, "topic": "Algebra", "sub_topic": "Word problem", "tag": "word_problem", "difficulty": "HARD", "question_text": "Three consecutive numbers add up to 72. What is the smallest number?", "correct_answer": "23", "_wrong_answers": {"word_problem_weak": "24"}}, {"id": 1805, "topic": "Geometry", "sub_topic": "Word problem", "tag": "word_problem", "difficulty": "HARD", "question_text": "A rectangular garden is 12 m long and 4 m shorter than it is long. What is its area?", "correct_answer": "96", "_wrong_answers": {"word_problem_weak": "48"}}, ] # Fill in remaining standard fields for every bank entry. for q in QUESTION_BANK: q.setdefault("category", "Math") q.setdefault("year_level", "Year 6") q.setdefault("source", "BOOST") q.setdefault("source_description", None) q.setdefault("teacher_id", TUTOR["id"]) q.setdefault("maximum_marks", 1) q.setdefault("rubric", None) q.setdefault("step_by_step_solution", None) q.setdefault("image_url", None) q.setdefault("is_deleted", False) q.setdefault("created_at", ms(days_ago(40))) q.setdefault("updated_at", ms(days_ago(40))) q.setdefault("_wrong_answers", {}) # --------------------------------------------------------------------------- # Assignments + Assignment Questions # # 8 assignments. Distribution: 5 CLOSED (past), 2 PUBLISHED (in-flight), # 1 DRAFT (future). The deadline-pressure assignment is a Fractions/Add # assignment due in 5 days — drives the bonus Early Warning topic correlation. # --------------------------------------------------------------------------- ASSIGNMENT_DEFS = [ # (id, name, focus_topic, due_offset_days, status, question_bank_ids) (3001, "HW1 — Place Value Warmup", "Place Value", -28, "CLOSED", [1001, 1002, 1003, 1101, 1102, 1301, 1401, 1701]), (3002, "HW2 — Arithmetic Practice", "Arithmetic", -22, "CLOSED", [1101, 1102, 1103, 1104, 1105, 1106, 1801, 1803]), (3003, "HW3 — Fractions Foundations", "Fractions", -16, "CLOSED", [1401, 1402, 1411, 1412, 1413, 1421, 1422, 1802]), (3004, "HW4 — Negatives & BIDMAS", "BIDMAS", -10, "CLOSED", [1201, 1202, 1203, 1301, 1302, 1303, 1502, 1701]), (3005, "HW5 — Geometry Basics", "Geometry", -6, "CLOSED", [1601, 1602, 1603, 1611, 1612, 1613, 1804, 1805]), (3006, "HW6 — Algebra & Sequences", "Algebra", 2, "PUBLISHED", [1501, 1502, 1503, 1511, 1512, 1513, 1804, 1702]), # The deadline-pressure assignment — bonus Early Warning anchors here. (3007, "HW7 — Adding Fractions (test prep)", "Fractions", 5, "PUBLISHED", [1411, 1412, 1413, 1414, 1415, 1416, 1802, 1422]), (3008, "HW8 — Mixed Revision", "Mixed", 12, "DRAFT", [1004, 1106, 1303, 1416, 1503, 1613, 1712, 1805]), ] ASSIGNMENTS = [] ASSIGNMENT_QUESTIONS = [] for aid, name, topic, offset, status, qb_ids in ASSIGNMENT_DEFS: created_offset = max(offset - 7, -45) # created ~1 week before due ASSIGNMENTS.append({ "id": aid, "name": name, "teacher_id": TUTOR["id"], "topic": topic, "due_date": ms(days_ago(-offset, hour=23, minute=59)), "status": status, "maximum_marks": len(qb_ids), "is_deleted": False, "created_at": ms(days_ago(-created_offset)), "updated_at": ms(days_ago(-created_offset)), }) for order, qb_id in enumerate(qb_ids, start=1): ASSIGNMENT_QUESTIONS.append({ "id": aid * 100 + order, "assignment_id": aid, "question_bank_id": qb_id, "question_order": order, "maximum_marks": 1, "rubric": None, "created_at": ms(days_ago(-created_offset)), }) # --------------------------------------------------------------------------- # Assignment Assignees (per student × per assignment) + Student Answers # --------------------------------------------------------------------------- QB_BY_ID = {q["id"]: q for q in QUESTION_BANK} AQ_BY_ID = {aq["id"]: aq for aq in ASSIGNMENT_QUESTIONS} def assignee_status_for(assignment_status: str, persona: str, aid: int) -> str: if assignment_status == "DRAFT": return "NOT_STARTED" if assignment_status == "PUBLISHED": # Some students have started it, some not. return "IN_PROGRESS" return "SUBMITTED" # --- Persona-driven correctness/solve-mode generation ----------------------- # # Each persona is a function that, given the assignment, the question, and # the assignment's "week index" (0 = oldest, higher = more recent), returns: # (is_correct, solve_mode, time_seconds, answer_text) # All randomness flows through RNG (seeded), so output is deterministic. SOLVE_MODES = ["just_answer", "step_by_step", "solve_together", "handwritten"] def base_time_for_difficulty(d: str) -> int: return {"EASY": 60, "MEDIUM": 100, "HARD": 160}[d] def jitter_time(base: int) -> int: return max(15, int(base + RNG.randint(-25, 35))) def pick_mode_default(persona: str, week_idx: int) -> str: # Most students mostly use just_answer; occasionally step_by_step; # rarely solve_together. r = RNG.random() if r < 0.70: return "just_answer" if r < 0.90: return "step_by_step" if r < 0.97: return "solve_together" return "handwritten" def answer_for_persona(q: dict, persona: str, force_correct: bool) -> tuple[bool, str]: """Return (is_correct, answer_text).""" if force_correct: return True, q["correct_answer"] wrong_map = q.get("_wrong_answers", {}) or {} if persona in wrong_map: return False, wrong_map[persona] # Generic wrong answer. return False, q["correct_answer"] + "?" def gen_answer(student: dict, assignment: dict, aq: dict, q: dict, week_idx: int, total_weeks: int): """Return a student_answers row dict (or None if assignee hasn't attempted it).""" persona = student["_persona"] difficulty = q["difficulty"] base_time = base_time_for_difficulty(difficulty) is_word = q.get("tag") == "word_problem" is_fraction_op = q["topic"] == "Fractions" and q["sub_topic"] in ("Add", "Subtract", "Multiply") is_place_value = q["topic"] == "Place Value" or (q["topic"] == "Arithmetic" and q["sub_topic"] in ("Subtraction", "Multiplication") and difficulty == "HARD") # Default: stable_mid baseline. p_correct = 0.65 solve_mode = pick_mode_default(persona, week_idx) answered_at_offset_days = 0 # set below misconception_tag = None if persona == "fraction_inversion": if is_fraction_op: # Sharp misconception: very low on fraction ops, declining. p_correct = max(0.03, 0.20 - 0.03 * week_idx) misconception_candidate = "add_tops_add_bottoms" if q["sub_topic"] in ("Add", "Subtract") else "fraction_op_confusion" elif q["topic"] == "Fractions": # Equivalent fractions etc: still shaky. p_correct = 0.25 misconception_candidate = "fraction_general_uncertainty" else: p_correct = 0.78 misconception_candidate = None elif persona == "place_value_gaps": if is_place_value or (q["topic"] == "Place Value"): p_correct = 0.25 misconception_candidate = "place_value_misalignment" else: p_correct = 0.65 misconception_candidate = None elif persona == "rushed_careless": # Right method when forced to slow down (step_by_step), wrong when rushed. # In just_answer: 40% correct. In step_by_step: 90% correct. # Time-on-task drops over time (rushing more). # Solve mode mostly just_answer. r = RNG.random() solve_mode = "just_answer" if r < 0.85 else "step_by_step" if solve_mode == "step_by_step": p_correct = 0.90 else: p_correct = 0.40 misconception_candidate = "arithmetic_slip" # Time decays: week 0 = 0.9 * base, latest = 0.4 * base t_factor = max(0.4, 0.9 - 0.12 * week_idx) base_time = int(base_time * t_factor) elif persona == "solve_together_dependent": # solve_together usage rises sharply over time. Independent # accuracy is low and degrading — student is leaning on scaffolding # more and more. st_prob = 0.08 + 0.18 * week_idx # week 0 ~8%, week 5 ~98% st_prob = min(0.92, st_prob) r = RNG.random() if r < st_prob: solve_mode = "solve_together" p_correct = 0.85 else: solve_mode = "just_answer" if RNG.random() < 0.7 else "step_by_step" p_correct = max(0.20, 0.55 - 0.06 * week_idx) misconception_candidate = "scaffolding_dependence" elif persona == "word_problem_weak": if is_word: p_correct = 0.20 misconception_candidate = "word_problem_interpretation" else: p_correct = 0.78 misconception_candidate = None elif persona == "stable_strong": p_correct = 0.88 if difficulty != "HARD" else 0.78 misconception_candidate = None elif persona == "stable_mid": p_correct = 0.65 if difficulty != "HARD" else 0.50 misconception_candidate = None elif persona == "stable_weak": p_correct = 0.55 if difficulty != "HARD" else 0.40 misconception_candidate = None else: misconception_candidate = None # Decide attempted-or-not for IN_PROGRESS assignments. is_correct = RNG.random() < p_correct is_correct, answer_text = answer_for_persona(q, persona, force_correct=is_correct) if not is_correct: misconception_tag = misconception_candidate return { "is_correct": is_correct, "answer_text": answer_text, "solve_mode": solve_mode, "time_on_task_seconds": jitter_time(base_time), "misconception_tag": misconception_tag, } def derive_working_steps(q: dict, ans: dict): solve_mode = ans["solve_mode"] correct = bool(ans["is_correct"]) topic = q["topic"] sub_topic = q.get("sub_topic") or topic expected = q["correct_answer"] given = ans["answer_text"] misconception_tag = ans.get("misconception_tag") if solve_mode == "just_answer": return "I solved it mentally and wrote the final answer only." if solve_mode == "solve_together": if correct: return f"I followed guided steps for {topic.lower()} and reached {given}." return f"I needed guided help on {sub_topic.lower()}, but I still ended with {given} instead of {expected}." if solve_mode == "handwritten": if correct: return f"I worked it out on paper using {sub_topic.lower()} and checked that the final answer was {given}." return f"I worked it out on paper for {sub_topic.lower()}, but my final answer was {given} instead of {expected}." if correct: return f"I used a step-by-step method for {sub_topic.lower()} and got the correct answer {given}." misconception_explanations = { "add_tops_add_bottoms": "I added the numerators and denominators directly instead of finding a common denominator.", "fraction_op_confusion": "I mixed up the fraction rule and used the wrong operation method.", "fraction_general_uncertainty": "I was unsure which fraction method to use, so my working was inconsistent.", "place_value_misalignment": "I lined the digits up incorrectly and misread the place value.", "arithmetic_slip": "My method was close, but I made a careless arithmetic slip in the calculation.", "scaffolding_dependence": "I could start the method, but I was not secure enough to finish it independently.", "word_problem_interpretation": "I misunderstood what the word problem was asking me to calculate.", } explanation = misconception_explanations.get( misconception_tag, f"My method did not lead to the expected answer {expected}.", ) return f"I tried a step-by-step method for {sub_topic.lower()}, but I got {given}. {explanation}" def derive_review_fields(q: dict, ans: dict): correct = bool(ans["is_correct"]) solve_mode = ans["solve_mode"] misconception_tag = ans["misconception_tag"] expected = q["correct_answer"] if correct: understanding_score = { "step_by_step": 0.95, "handwritten": 0.85, "just_answer": 0.75, "solve_together": 0.65, }.get(solve_mode, 0.75) confidence = { "step_by_step": 0.82, "handwritten": 0.78, "just_answer": 0.9, "solve_together": 0.62, }.get(solve_mode, 0.78) needs_attention = understanding_score < 0.72 issue_reason = ( "Correct answer, but there is limited evidence that the method is secure." if needs_attention else None ) ai_feedback_review = ( "Correct answer with clear method evidence." if solve_mode == "step_by_step" else "Correct answer, but understanding evidence is lighter because the response is brief." if needs_attention else "Correct answer with secure understanding shown." ) else: understanding_score = { "step_by_step": 0.4, "handwritten": 0.32, "just_answer": 0.2, "solve_together": 0.28, }.get(solve_mode, 0.25) confidence = { "step_by_step": 0.55, "handwritten": 0.6, "just_answer": 0.72, "solve_together": 0.5, }.get(solve_mode, 0.6) needs_attention = True issue_reason = { "add_tops_add_bottoms": "The student added the numerator and denominator directly instead of finding a common denominator.", "fraction_op_confusion": "The student confused the fraction operation and did not apply the correct method.", "fraction_general_uncertainty": "The student shows insecure understanding of equivalent or comparable fractions.", "place_value_misalignment": "The student misread place value, causing digits to be aligned incorrectly.", "arithmetic_slip": "The final answer is wrong, suggesting a careless arithmetic slip rather than a secure method.", "scaffolding_dependence": "The student appears dependent on scaffolding and does not show secure independent understanding.", "word_problem_interpretation": "The student did not translate the word problem into the correct calculation.", }.get( misconception_tag, f"The answer does not match the correct answer ({expected}), showing an incomplete understanding of the method.", ) ai_feedback_review = f"Incorrect answer compared with the expected answer {expected}. {issue_reason}" return { "review_correctness_score": 1.0, "review_question_score": 1.0, "review_understanding_score": round(understanding_score, 3), "review_confidence": round(confidence, 3), "review_needs_attention": needs_attention, "review_issue_reason": issue_reason, "review_tags": [misconception_tag] if misconception_tag else [], "ai_feedback_review": ai_feedback_review, } def build_assignment_review_summary(student_name: str, assignment_name: str, reviews: list[dict]): if not reviews: return None, "No submitted work is available for this assignment yet.", None per_question_scores = [] weak_topics: dict[str, list[float]] = {} attention_count = 0 correct_count = 0 for review in reviews: correctness_value = 1.0 if review["is_correct"] else 0.0 understanding_value = review["review_understanding_score"] question_score = (correctness_value + understanding_value) / 2 per_question_scores.append(question_score) weak_topics.setdefault(review["topic"], []).append(question_score) if review["review_needs_attention"]: attention_count += 1 if review["is_correct"]: correct_count += 1 overall_score = round(sum(per_question_scores) / len(per_question_scores) * 10, 2) weakest_topics = sorted( ((topic, sum(values) / len(values)) for topic, values in weak_topics.items()), key=lambda item: (item[1], item[0]), )[:2] weakest_topic_text = ", ".join(topic for topic, _ in weakest_topics) if weakest_topics else "general fluency" next_step_outcome = "accept" if overall_score >= 6.0 else "support" if overall_score >= 4.5 else "redo" ai_feedback = ( f"{student_name} completed {assignment_name} with {correct_count}/{len(reviews)} correct responses. " f"Overall score is {overall_score}/10. " f"The weakest areas were {weakest_topic_text}. " f"{attention_count} question(s) need extra attention." ) return overall_score, ai_feedback, next_step_outcome # Order assignments oldest -> newest for week_idx threading. ASSIGNMENT_DEFS_SORTED = sorted(ASSIGNMENT_DEFS, key=lambda a: a[3]) # by due_offset weeks_total = len(ASSIGNMENT_DEFS_SORTED) ASSIGNMENT_ASSIGNEES = [] STUDENT_ANSWERS = [] ACTIVITY_LOGS = [] assignee_id_seq = 4000 answer_id_seq = 50000 log_id_seq = 70000 # Define which students "fall behind" on attempts (bonus signal). LOW_ATTEMPT_RATE_STUDENTS = {203} # Chen Wei: persona=rushed_careless + 8 days no activity SKIP_RECENT_ASSIGNMENTS_STUDENTS = {203} # student 3 hasn't attempted recent assignments for week_idx, assignment_def in enumerate(ASSIGNMENT_DEFS_SORTED): aid, name, topic, offset, status, qb_ids = assignment_def a_questions = [aq for aq in ASSIGNMENT_QUESTIONS if aq["assignment_id"] == aid] for student in STUDENTS: sid = student["id"] persona = student["_persona"] # Skip recent assignments for students with low attempt rate if status != "CLOSED" and sid in SKIP_RECENT_ASSIGNMENTS_STUDENTS: assignee = { "id": assignee_id_seq, "assignment_id": aid, "student_id": sid, "classroom_id": CLASSROOM["id"], "status": "NOT_STARTED", "started_at": None, "submitted_at": None, "total_marks": None, "is_active": True, "deactivated_at": None, "created_at": ms(days_ago(-(offset - 7))), } ASSIGNMENT_ASSIGNEES.append(assignee) assignee_id_seq += 1 continue # Skip last published assignment for some low-attempt students if sid in LOW_ATTEMPT_RATE_STUDENTS and status != "CLOSED": assignee = { "id": assignee_id_seq, "assignment_id": aid, "student_id": sid, "classroom_id": CLASSROOM["id"], "status": "NOT_STARTED", "started_at": None, "submitted_at": None, "total_marks": None, "is_active": True, "deactivated_at": None, "created_at": ms(days_ago(-(offset - 7))), } ASSIGNMENT_ASSIGNEES.append(assignee) assignee_id_seq += 1 continue # In-progress: ~70% have started and answered ~half the questions if status == "IN_PROGRESS" or status == "PUBLISHED": started = RNG.random() < 0.85 submitted = False if not started: assignee = { "id": assignee_id_seq, "assignment_id": aid, "student_id": sid, "classroom_id": CLASSROOM["id"], "status": "NOT_STARTED", "started_at": None, "submitted_at": None, "total_marks": None, "is_active": True, "deactivated_at": None, "created_at": ms(days_ago(-(offset - 7))), } ASSIGNMENT_ASSIGNEES.append(assignee) assignee_id_seq += 1 continue else: started = True submitted = True # Date the attempts: roughly the day after the assignment was set attempt_day_offset = max(offset - 5, -42) started_at = days_ago(-attempt_day_offset, hour=16 + RNG.randint(0, 3), minute=RNG.randint(0, 59)) # For DRAFT, no assignees needed (skip) if status == "DRAFT": continue assignee_id = assignee_id_seq assignee_id_seq += 1 # Generate answers total_score = 0 questions_to_answer = a_questions if status in ("IN_PROGRESS", "PUBLISHED") and not submitted: # Partial completion n_done = RNG.randint(max(1, len(a_questions) // 2), len(a_questions)) questions_to_answer = a_questions[:n_done] running_time_offset = 0 answer_reviews = [] for aq in questions_to_answer: q = QB_BY_ID[aq["question_bank_id"]] ans = gen_answer(student, assignment_def, aq, q, week_idx, weeks_total) working_steps = derive_working_steps(q, ans) review = derive_review_fields(q, ans) answered_at = started_at + timedelta(seconds=running_time_offset + ans["time_on_task_seconds"]) running_time_offset += ans["time_on_task_seconds"] + RNG.randint(5, 30) STUDENT_ANSWERS.append({ "id": answer_id_seq, "assignee_id": assignee_id, "assignment_question_id": aq["id"], "answer_type": "LATEX", "answer_latex": ans["answer_text"], "extracted_answer": ans["answer_text"], "solve_mode": ans["solve_mode"], "working_steps": working_steps, "graded_marks": 1 if ans["is_correct"] else 0, "marks_awarded": 1 if ans["is_correct"] else 0, "ai_reasoning": ( "Answer matches expected solution." if ans["is_correct"] else f"Incorrect; expected {q['correct_answer']}." ), "is_correct": ans["is_correct"], "ai_feedback": review["ai_feedback_review"], "review_needs_attention": review["review_needs_attention"], "review_issue_reason": review["review_issue_reason"], "review_correctness_score": review["review_correctness_score"], "review_understanding_score": review["review_understanding_score"], "review_question_score": review["review_question_score"], "review_confidence": review["review_confidence"], "review_tags": review["review_tags"], "grading_status": "GRADED", "grading_attempts": 1, "is_active": True, "created_at": ms(answered_at), # ---- Hackathon annotations (not in production schema) ---- "_solve_mode": ans["solve_mode"], "_time_on_task_seconds": ans["time_on_task_seconds"], "_is_correct": ans["is_correct"], "_misconception_tag": ans["misconception_tag"], "_question_topic": q["topic"], "_question_sub_topic": q["sub_topic"], "_question_difficulty": q["difficulty"], "_answered_at": ms(answered_at), }) answer_reviews.append({ "topic": q["topic"], "is_correct": ans["is_correct"], "review_understanding_score": review["review_understanding_score"], "review_needs_attention": review["review_needs_attention"], }) answer_id_seq += 1 total_score += (1 if ans["is_correct"] else 0) # Activity log ACTIVITY_LOGS.append({ "id": log_id_seq, "assignee_id": assignee_id, "assignment_question_id": aq["id"], "activity_type": "ANSWERED", "timestamp": ms(answered_at), "duration_seconds": ans["time_on_task_seconds"], "extra_data": {"solve_mode": ans["solve_mode"]}, "created_at": ms(answered_at), "_student_id": sid, }) log_id_seq += 1 submitted_at = started_at + timedelta(seconds=running_time_offset) if submitted else None overall_score, assignee_ai_feedback, next_step_outcome = build_assignment_review_summary( student["fullname"], name, answer_reviews, ) assignee = { "id": assignee_id, "assignment_id": aid, "student_id": sid, "classroom_id": CLASSROOM["id"], "status": "SUBMITTED" if submitted else "IN_PROGRESS", "started_at": ms(started_at), "submitted_at": ms(submitted_at) if submitted_at else None, "total_marks": total_score if submitted else None, "overall_score": overall_score if submitted else None, "ai_feedback": assignee_ai_feedback if submitted else None, "next_step_outcome": next_step_outcome if submitted else None, "is_active": True, "deactivated_at": None, "created_at": ms(days_ago(-(offset - 7))), } ASSIGNMENT_ASSIGNEES.append(assignee) # --------------------------------------------------------------------------- # Write outputs # --------------------------------------------------------------------------- def write_json(name: str, data) -> None: path = OUT_DIR / name with path.open("w") as f: json.dump(data, f, indent=2, default=str) print(f" wrote {name} ({len(data) if isinstance(data, list) else 'object'} records)") print("Generating mock dataset...") write_json("classroom.json", { "classroom": CLASSROOM, "tutor": TUTOR, "classroom_student_rs": CLASSROOM_STUDENT_RS, }) write_json("students.json", STUDENTS) write_json("question_bank.json", QUESTION_BANK) write_json("assignments.json", ASSIGNMENTS) write_json("assignment_questions.json", ASSIGNMENT_QUESTIONS) write_json("assignment_assignees.json", ASSIGNMENT_ASSIGNEES) write_json("student_answers.json", STUDENT_ANSWERS) write_json("activity_logs.json", ACTIVITY_LOGS) dataset = { "_meta": { "generated_at_utc": TODAY.isoformat(), "reference_today": TODAY.date().isoformat(), "schema_source": "elevenplus-backend/src/app/models/", "subject": "Maths (UK 11+)", "students": len(STUDENTS), "assignments": len(ASSIGNMENTS), "questions_in_bank": len(QUESTION_BANK), "student_answers": len(STUDENT_ANSWERS), "expected_top_3_at_risk_student_ids": [201, 203, 204], }, "classroom": CLASSROOM, "tutor": TUTOR, "classroom_student_rs": CLASSROOM_STUDENT_RS, "students": STUDENTS, "question_bank": QUESTION_BANK, "assignments": ASSIGNMENTS, "assignment_questions": ASSIGNMENT_QUESTIONS, "assignment_assignees": ASSIGNMENT_ASSIGNEES, "student_answers": STUDENT_ANSWERS, "activity_logs": ACTIVITY_LOGS, } write_json("dataset.json", dataset) print("Done.")