from __future__ import annotations

import argparse
import hashlib
import json
import webbrowser
from dataclasses import dataclass
from pathlib import Path

import matplotlib

matplotlib.use("Agg")

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


FACULTY_PROGRAMMES = {
    "Medicine": ["Medicine", "Nursing", "Public Health"],
    "Engineering": ["Computer Science", "Mechanical Engineering", "Cybernetics"],
    "Humanities": ["History", "Linguistics", "Philosophy"],
    "Social Sciences": ["Economics", "Political Science", "Psychology"],
    "Business": ["Accounting", "Finance", "Management"],
    "Education": ["Primary Education", "Special Needs Education", "Pedagogy"],
    "Law": ["Law", "International Law", "Public Administration"],
    "Natural Sciences": ["Biology", "Chemistry", "Mathematics"],
}

TERMS = {
    "2025A": {
        "start": pd.Timestamp("2025-08-18"),
        "end": pd.Timestamp("2025-12-07"),
        "label_observed_after": pd.Timestamp("2026-01-15"),
    },
    "2026S": {
        "start": pd.Timestamp("2026-01-12"),
        "end": pd.Timestamp("2026-05-03"),
        "label_observed_after": pd.Timestamp("2026-06-15"),
    },
}

MODEL_VERSION = "student-risk-demo-0.1"


@dataclass
class SourceData:
    sis_students: pd.DataFrame
    identity_map: pd.DataFrame
    status_history: pd.DataFrame
    enrollment_terms: pd.DataFrame
    lms_activity_daily: pd.DataFrame
    erp_finance_snapshots: pd.DataFrame
    campus_activity_daily: pd.DataFrame


def sigmoid(values: np.ndarray) -> np.ndarray:
    return 1.0 / (1.0 + np.exp(-np.clip(values, -50, 50)))


def term_dates(term_id: str) -> pd.DatetimeIndex:
    term = TERMS[term_id]
    return pd.date_range(term["start"], term["end"], freq="D")


def choose_programmes(faculties: np.ndarray, rng: np.random.Generator) -> list[str]:
    programmes = []
    for faculty in faculties:
        programmes.append(rng.choice(FACULTY_PROGRAMMES[faculty]))
    return programmes


def generate_sources(n_students: int, seed: int) -> SourceData:
    rng = np.random.default_rng(seed)
    faculties = np.array(list(FACULTY_PROGRAMMES.keys()))

    canonical_ids = np.array([f"STU-{100000 + i}" for i in range(n_students)])
    faculty = rng.choice(faculties, size=n_students, p=[0.11, 0.15, 0.1, 0.15, 0.13, 0.12, 0.09, 0.15])
    programme = np.array(choose_programmes(faculty, rng))
    advisor_id = np.array([f"ADV-{rng.integers(100, 175)}" for _ in range(n_students)])
    study_mode = rng.choice(["campus", "hybrid", "online-heavy"], size=n_students, p=[0.5, 0.35, 0.15])
    gender = rng.choice(["female", "male", "non-binary_or_not_shared"], size=n_students, p=[0.53, 0.45, 0.02])
    age_band = rng.choice(["under_23", "23_29", "30_plus"], size=n_students, p=[0.62, 0.27, 0.11])
    international = rng.random(n_students) < 0.17
    first_generation = rng.random(n_students) < 0.24
    commute_minutes = np.clip(rng.normal(28, 18, n_students), 2, 120).round().astype(int)

    ability = rng.normal(0, 1, n_students)
    financial_stress = rng.random(n_students) < (0.18 + 0.08 * first_generation + 0.04 * international)
    base_gpa = np.clip(3.05 + 0.38 * ability - 0.16 * financial_stress + rng.normal(0, 0.28, n_students), 1.0, 4.0)

    sis_students = pd.DataFrame(
        {
            "canonical_student_id": canonical_ids,
            "sis_student_id": [f"FS-{200000 + i}" for i in range(n_students)],
            "birth_year": 2026 - rng.choice([20, 21, 22, 23, 24, 27, 32], size=n_students, p=[0.18, 0.2, 0.19, 0.15, 0.1, 0.11, 0.07]),
            "gender": gender,
            "age_band": age_band,
            "international_student": international,
            "first_generation_student": first_generation,
            "home_commute_minutes": commute_minutes,
            "preferred_study_mode": study_mode,
            "faculty": faculty,
            "programme": programme,
            "advisor_id": advisor_id,
            "privacy_notice_version": "2026-01",
        }
    )

    lms_ids = [f"CANVAS-{300000 + i}" for i in range(n_students)]
    erp_ids = [f"UNIT4-{400000 + i}" for i in range(n_students)]
    badge_ids = [f"BADGE-{500000 + i}" for i in range(n_students)]
    source_rows = []
    for _, row in sis_students.iterrows():
        source_rows.extend(
            [
                (row.canonical_student_id, "SIS", row.sis_student_id, "2024-08-01", None, 1.0, "system_of_record"),
                (row.canonical_student_id, "LMS", lms_ids[row.name], "2024-08-01", None, 0.995, "verified_sso_match"),
                (row.canonical_student_id, "ERP", erp_ids[row.name], "2024-08-01", None, 0.99, "national_id_hash_match"),
                (row.canonical_student_id, "CAMPUS", badge_ids[row.name], "2024-08-01", None, 0.985, "card_registry_match"),
            ]
        )
    identity_map = pd.DataFrame(
        source_rows,
        columns=[
            "canonical_student_id",
            "source_system",
            "source_person_id",
            "valid_from",
            "valid_to",
            "match_confidence",
            "match_method",
        ],
    )
    identity_map["valid_from"] = pd.to_datetime(identity_map["valid_from"])
    identity_map["valid_to"] = pd.to_datetime(identity_map["valid_to"])

    enrollment_rows = []
    status_rows = []
    lms_rows = []
    erp_rows = []
    campus_rows = []

    for i, student in sis_students.iterrows():
        current_programme = student.programme
        switched_programme = rng.random() < 0.045
        programme_2026 = current_programme
        if switched_programme:
            choices = [p for p in FACULTY_PROGRAMMES[student.faculty] if p != current_programme]
            programme_2026 = rng.choice(choices) if choices else current_programme

        status_rows.append(
            {
                "sis_student_id": student.sis_student_id,
                "status": "active",
                "faculty": student.faculty,
                "programme": current_programme,
                "valid_from": pd.Timestamp("2024-08-01"),
                "valid_to": pd.Timestamp("2026-01-12") if switched_programme else pd.NaT,
                "change_recorded_at": pd.Timestamp("2024-08-01"),
                "source_sequence": 1,
            }
        )
        if switched_programme:
            status_rows.append(
                {
                    "sis_student_id": student.sis_student_id,
                    "status": "active",
                    "faculty": student.faculty,
                    "programme": programme_2026,
                    "valid_from": pd.Timestamp("2026-01-12"),
                    "valid_to": pd.NaT,
                    "change_recorded_at": pd.Timestamp("2025-12-18"),
                    "source_sequence": 2,
                }
            )

        continues_to_next_term = True
        for term_id in TERMS:
            if term_id == "2026S" and not continues_to_next_term:
                continue
            term = TERMS[term_id]
            term_index = 0 if term_id == "2025A" else 1
            prior_gpa = np.clip(base_gpa[i] + rng.normal(0, 0.12) - 0.07 * term_index, 1.0, 4.0)
            credits_registered = int(rng.choice([20, 25, 30, 35], p=[0.06, 0.12, 0.76, 0.06]))
            academic_standing = "normal"
            if prior_gpa < 2.15:
                academic_standing = "probation"
            elif prior_gpa < 2.45:
                academic_standing = "watch"

            latent_disengagement = rng.normal(0, 1) + 0.55 * (prior_gpa < 2.3) + 0.45 * financial_stress[i]
            dropout_probability = sigmoid(
                np.array(
                    [
                        -3.25
                        + 0.9 * max(0, 2.6 - prior_gpa)
                        + 0.75 * financial_stress[i]
                        + 0.85 * latent_disengagement
                        + 0.35 * (academic_standing == "probation")
                    ]
                )
            )[0]
            # Approved leave is separate from dropout and should not be treated as the same target.
            approved_leave_probability = 0.025 + 0.015 * (student.age_band == "30_plus")
            approved_leave_next_term = rng.random() < approved_leave_probability
            dropout_next_term = (rng.random() < dropout_probability) and not approved_leave_next_term

            enrollment_rows.append(
                {
                    "sis_student_id": student.sis_student_id,
                    "canonical_student_id": student.canonical_student_id,
                    "term_id": term_id,
                    "term_start_date": term["start"],
                    "faculty": student.faculty,
                    "programme": current_programme if term_id == "2025A" else programme_2026,
                    "credits_registered": credits_registered,
                    "credits_completed_previous_term": int(np.clip(rng.normal(23 + 5 * (prior_gpa - 2.5), 6), 0, 35)),
                    "prior_gpa": round(float(prior_gpa), 2),
                    "academic_standing": academic_standing,
                    "approved_leave_next_term": bool(approved_leave_next_term),
                    "dropout_label_next_term": bool(dropout_next_term),
                    "label_observed_after": term["label_observed_after"],
                    "available_at": term["start"] - pd.Timedelta(days=14),
                    "financial_stress_seed": bool(financial_stress[i]),
                    "latent_disengagement_seed": float(latent_disengagement),
                }
            )

            if dropout_next_term:
                status_rows.append(
                    {
                        "sis_student_id": student.sis_student_id,
                        "status": "withdrawn",
                        "faculty": student.faculty,
                        "programme": current_programme if term_id == "2025A" else programme_2026,
                        "valid_from": term["end"] + pd.Timedelta(days=18),
                        "valid_to": pd.NaT,
                        "change_recorded_at": term["label_observed_after"],
                        "source_sequence": 99,
                    }
                )
                if term_id == "2025A":
                    continues_to_next_term = False

            generate_term_events(
                rng=rng,
                student=student,
                term_id=term_id,
                prior_gpa=prior_gpa,
                financial_stress=bool(financial_stress[i]),
                latent_disengagement=float(latent_disengagement),
                dropout_next_term=bool(dropout_next_term),
                lms_user_id=lms_ids[i],
                erp_account_id=erp_ids[i],
                badge_id=badge_ids[i],
                lms_rows=lms_rows,
                erp_rows=erp_rows,
                campus_rows=campus_rows,
            )

    return SourceData(
        sis_students=sis_students,
        identity_map=identity_map,
        status_history=pd.DataFrame(status_rows),
        enrollment_terms=pd.DataFrame(enrollment_rows),
        lms_activity_daily=pd.DataFrame(lms_rows),
        erp_finance_snapshots=pd.DataFrame(erp_rows),
        campus_activity_daily=pd.DataFrame(campus_rows),
    )


def generate_term_events(
    rng: np.random.Generator,
    student: pd.Series,
    term_id: str,
    prior_gpa: float,
    financial_stress: bool,
    latent_disengagement: float,
    dropout_next_term: bool,
    lms_user_id: str,
    erp_account_id: str,
    badge_id: str,
    lms_rows: list[dict],
    erp_rows: list[dict],
    campus_rows: list[dict],
) -> None:
    term = TERMS[term_id]
    dates = term_dates(term_id)
    mode = student.preferred_study_mode
    lms_base = {"campus": 1.2, "hybrid": 1.55, "online-heavy": 2.05}[mode]
    campus_base = {"campus": 3.1, "hybrid": 2.1, "online-heavy": 0.65}[mode]
    if student.home_commute_minutes > 55:
        campus_base *= 0.76
    if bool(student.international_student):
        lms_base *= 1.05
    ability_boost = np.clip((prior_gpa - 2.3) / 1.7, 0, 0.7)

    for day in dates:
        week = int((day - term["start"]).days // 7) + 1
        is_weekend = day.weekday() >= 5
        decline = 1.0
        if dropout_next_term and week >= 4:
            decline = max(0.16, 1.0 - 0.105 * (week - 3) - 0.05 * max(0, latent_disengagement))
        noise = rng.lognormal(mean=0, sigma=0.18)
        weekday_factor = 0.42 if is_weekend else 1.0

        login_lambda = max(0.03, lms_base * decline * weekday_factor * noise + 0.1 * ability_boost)
        login_count = int(rng.poisson(login_lambda))
        course_views = int(rng.poisson(max(0.1, login_count * rng.uniform(2.0, 5.5))))
        video_minutes = int(rng.gamma(shape=1.5, scale=18) * min(1.8, login_count / 2.2)) if login_count else 0
        forum_posts = int(rng.poisson(0.05 + 0.12 * login_count + (0.04 if mode == "online-heavy" else 0)))
        quiz_attempts = int(rng.poisson(0.1 + 0.18 * login_count)) if week <= 12 else 0

        assignment_due = int(day.weekday() == 2 and 2 <= week <= 12)
        submission_probability = 0.88 + 0.06 * ability_boost - 0.22 * dropout_next_term * (week >= 4) - 0.08 * financial_stress
        assignment_submitted = int(assignment_due and rng.random() < np.clip(submission_probability, 0.1, 0.98))
        assignment_late = int(assignment_submitted and rng.random() < (0.09 + 0.18 * dropout_next_term * (week >= 4)))

        available_delay_hours = int(rng.integers(2, 38))
        lms_rows.append(
            {
                "lms_user_id": lms_user_id,
                "term_id": term_id,
                "activity_date": day,
                "login_count": login_count,
                "course_views": course_views,
                "assignment_due_count": assignment_due,
                "assignment_submitted_count": assignment_submitted,
                "late_submission_count": assignment_late,
                "forum_post_count": forum_posts,
                "video_minutes": video_minutes,
                "quiz_attempt_count": quiz_attempts,
                "available_at": day + pd.Timedelta(hours=available_delay_hours),
            }
        )

        campus_decline = decline if dropout_next_term and week >= 4 else 1.0
        campus_lambda = max(0.01, campus_base * campus_decline * weekday_factor * rng.lognormal(0, 0.2))
        building_entries = int(rng.poisson(campus_lambda))
        wifi_minutes = int(rng.gamma(2.0, 60) * min(1.0, building_entries / 3.0)) if building_entries else 0
        library_entries = int(rng.poisson(0.14 * building_entries + 0.03 * (week <= 12)))
        campus_rows.append(
            {
                "badge_id": badge_id,
                "term_id": term_id,
                "event_date": day,
                "building_entry_count": building_entries,
                "library_entry_count": library_entries,
                "wifi_minutes": wifi_minutes,
                "campus_zone_count": int(min(building_entries, rng.integers(1, 6))) if building_entries else 0,
                "available_at": day + pd.Timedelta(hours=int(rng.integers(4, 48))),
            }
        )

    for snap_date in pd.date_range(term["start"] - pd.Timedelta(days=7), term["end"], freq="7D"):
        week = max(0, int((snap_date - term["start"]).days // 7) + 1)
        overdue_days = 0
        balance = 0.0
        tuition_paid = True
        if financial_stress:
            tuition_paid = week >= 9 and rng.random() < 0.35
            if not tuition_paid and week >= 3:
                overdue_days = int((week - 2) * 7 + rng.integers(0, 5))
                balance = float(np.clip(rng.normal(8500, 2500), 1600, 18000))
        scholarship_amount = float(rng.choice([0, 5000, 10000, 15000], p=[0.72, 0.12, 0.1, 0.06]))
        erp_rows.append(
            {
                "erp_account_id": erp_account_id,
                "term_id": term_id,
                "status_date": snap_date,
                "tuition_paid": bool(tuition_paid),
                "outstanding_balance_nok": round(balance, 2),
                "payment_overdue_days": overdue_days,
                "scholarship_amount_nok": scholarship_amount,
                "payment_plan": bool(financial_stress and rng.random() < 0.45),
                "aid_status": rng.choice(["none", "applied", "approved", "delayed"], p=[0.5, 0.16, 0.27, 0.07]),
                "available_at": snap_date + pd.Timedelta(hours=int(rng.integers(8, 72))),
            }
        )


def map_identity(df: pd.DataFrame, identity_map: pd.DataFrame, source_system: str, id_col: str) -> pd.DataFrame:
    mapping = identity_map.loc[identity_map["source_system"].eq(source_system), ["canonical_student_id", "source_person_id"]]
    return df.merge(mapping, left_on=id_col, right_on="source_person_id", how="left").drop(columns=["source_person_id"])


def build_feature_snapshot(sources: SourceData, term_id: str, as_of_date: pd.Timestamp) -> pd.DataFrame:
    term_start = TERMS[term_id]["start"]
    students = sources.sis_students.copy()
    enroll = sources.enrollment_terms.loc[sources.enrollment_terms["term_id"].eq(term_id)].copy()
    enroll = enroll.loc[enroll["available_at"].le(as_of_date)]

    status = sources.status_history.copy()
    status["valid_to_filled"] = status["valid_to"].fillna(pd.Timestamp("2100-01-01"))
    status_at = status.loc[
        status["change_recorded_at"].le(as_of_date)
        & status["valid_from"].le(as_of_date)
        & status["valid_to_filled"].gt(as_of_date)
    ].sort_values(["sis_student_id", "source_sequence"])
    status_at = status_at.drop_duplicates("sis_student_id", keep="last")
    status_at = status_at[["sis_student_id", "status", "faculty", "programme"]].rename(
        columns={"status": "student_status_asof", "faculty": "faculty_asof", "programme": "programme_asof"}
    )

    lms = map_identity(sources.lms_activity_daily, sources.identity_map, "LMS", "lms_user_id")
    lms = lms.loc[lms["term_id"].eq(term_id) & lms["activity_date"].le(as_of_date) & lms["available_at"].le(as_of_date)]
    lms_14 = lms.loc[lms["activity_date"].gt(as_of_date - pd.Timedelta(days=14))]
    lms_to_date = lms.copy()
    last_login = (
        lms.loc[lms["login_count"].gt(0)]
        .groupby("canonical_student_id")["activity_date"]
        .max()
        .rename("last_lms_login_date")
        .reset_index()
    )
    lms_features = lms_14.groupby("canonical_student_id").agg(
        lms_logins_14d=("login_count", "sum"),
        lms_course_views_14d=("course_views", "sum"),
        video_minutes_14d=("video_minutes", "sum"),
        forum_posts_14d=("forum_post_count", "sum"),
        quiz_attempts_14d=("quiz_attempt_count", "sum"),
    )
    assessment_features = lms_to_date.groupby("canonical_student_id").agg(
        assignments_due_to_date=("assignment_due_count", "sum"),
        assignments_submitted_to_date=("assignment_submitted_count", "sum"),
        late_submissions_to_date=("late_submission_count", "sum"),
    )
    lms_features = lms_features.join(assessment_features, how="outer").reset_index()
    lms_features = lms_features.merge(last_login, on="canonical_student_id", how="left")

    campus = map_identity(sources.campus_activity_daily, sources.identity_map, "CAMPUS", "badge_id")
    campus = campus.loc[campus["term_id"].eq(term_id) & campus["event_date"].le(as_of_date) & campus["available_at"].le(as_of_date)]
    campus["active_campus_day"] = (campus["building_entry_count"].gt(0) | campus["wifi_minutes"].gt(0)).astype(int)
    campus_14 = campus.loc[campus["event_date"].gt(as_of_date - pd.Timedelta(days=14))]
    campus_30 = campus.loc[campus["event_date"].gt(as_of_date - pd.Timedelta(days=30))]
    campus_start = campus.loc[campus["event_date"].between(term_start, term_start + pd.Timedelta(days=13))]
    campus_features = campus_14.groupby("canonical_student_id").agg(
        campus_days_14d=("active_campus_day", "sum"),
        building_entries_14d=("building_entry_count", "sum"),
        campus_wifi_minutes_14d=("wifi_minutes", "sum"),
    )
    campus_features = campus_features.join(
        campus_30.groupby("canonical_student_id").agg(library_entries_30d=("library_entry_count", "sum")),
        how="outer",
    )
    campus_features = campus_features.join(
        campus_start.groupby("canonical_student_id").agg(campus_days_first_14d=("active_campus_day", "sum")),
        how="outer",
    ).reset_index()

    erp = map_identity(sources.erp_finance_snapshots, sources.identity_map, "ERP", "erp_account_id")
    erp = erp.loc[erp["term_id"].eq(term_id) & erp["status_date"].le(as_of_date) & erp["available_at"].le(as_of_date)]
    erp_latest = erp.sort_values(["canonical_student_id", "status_date"]).drop_duplicates("canonical_student_id", keep="last")
    erp_features = erp_latest[
        [
            "canonical_student_id",
            "tuition_paid",
            "outstanding_balance_nok",
            "payment_overdue_days",
            "scholarship_amount_nok",
            "payment_plan",
            "aid_status",
        ]
    ]

    features = (
        enroll.merge(students, on="canonical_student_id", how="left", suffixes=("", "_student"))
        .merge(status_at, on="sis_student_id", how="left")
        .merge(lms_features, on="canonical_student_id", how="left")
        .merge(campus_features, on="canonical_student_id", how="left")
        .merge(erp_features, on="canonical_student_id", how="left")
    )
    fill_zero = [
        "lms_logins_14d",
        "lms_course_views_14d",
        "video_minutes_14d",
        "forum_posts_14d",
        "quiz_attempts_14d",
        "assignments_due_to_date",
        "assignments_submitted_to_date",
        "late_submissions_to_date",
        "campus_days_14d",
        "building_entries_14d",
        "campus_wifi_minutes_14d",
        "library_entries_30d",
        "campus_days_first_14d",
        "outstanding_balance_nok",
        "payment_overdue_days",
        "scholarship_amount_nok",
    ]
    for col in fill_zero:
        if col in features:
            features[col] = features[col].fillna(0)

    features["as_of_date"] = as_of_date
    features["term_week"] = int((as_of_date - term_start).days // 7 + 1)
    features["student_status_asof"] = features["student_status_asof"].fillna("unknown")
    features["faculty_asof"] = features["faculty_asof"].fillna(features["faculty"])
    features["programme_asof"] = features["programme_asof"].fillna(features["programme"])
    features["tuition_paid"] = features["tuition_paid"].fillna(False).astype(bool)
    features["payment_plan"] = features["payment_plan"].fillna(False).astype(bool)
    features["aid_status"] = features["aid_status"].fillna("unknown")
    features["last_lms_login_date"] = pd.to_datetime(features["last_lms_login_date"])
    features["days_since_last_lms_login"] = (as_of_date - features["last_lms_login_date"]).dt.days
    features["days_since_last_lms_login"] = features["days_since_last_lms_login"].fillna(999)
    features["missing_assignments_to_date"] = (
        features["assignments_due_to_date"] - features["assignments_submitted_to_date"]
    ).clip(lower=0)
    features["submission_completion_rate_to_date"] = np.where(
        features["assignments_due_to_date"].gt(0),
        features["assignments_submitted_to_date"] / features["assignments_due_to_date"],
        1.0,
    )
    features["campus_drop_14d_vs_start"] = features["campus_days_first_14d"] - features["campus_days_14d"]
    features["outstanding_balance_log1p"] = np.log1p(features["outstanding_balance_nok"])
    features["financial_overdue_flag"] = features["payment_overdue_days"].gt(0).astype(int)
    return features


class NumpyLogisticRegression:
    def __init__(self, learning_rate: float = 0.16, epochs: int = 2200, l2: float = 0.02):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.l2 = l2
        self.weights: np.ndarray | None = None

    def fit(self, x: np.ndarray, y: np.ndarray) -> "NumpyLogisticRegression":
        x_bias = np.c_[np.ones(x.shape[0]), x]
        weights = np.zeros(x_bias.shape[1])
        for _ in range(self.epochs):
            preds = sigmoid(x_bias @ weights)
            gradient = (x_bias.T @ (preds - y)) / y.size
            gradient[1:] += self.l2 * weights[1:] / y.size
            weights -= self.learning_rate * gradient
        self.weights = weights
        return self

    def predict_proba(self, x: np.ndarray) -> np.ndarray:
        if self.weights is None:
            raise RuntimeError("Model has not been fit.")
        x_bias = np.c_[np.ones(x.shape[0]), x]
        return sigmoid(x_bias @ self.weights)


def prepare_design_matrix(
    train: pd.DataFrame,
    score: pd.DataFrame,
    numeric_cols: list[str],
    categorical_cols: list[str],
) -> tuple[np.ndarray, np.ndarray, pd.DataFrame, pd.Series, pd.Series]:
    train_design = pd.get_dummies(train[numeric_cols + categorical_cols], columns=categorical_cols, dummy_na=True)
    score_design = pd.get_dummies(score[numeric_cols + categorical_cols], columns=categorical_cols, dummy_na=True)
    train_design, score_design = train_design.align(score_design, join="outer", axis=1, fill_value=0)
    train_design = train_design.astype(float)
    score_design = score_design.astype(float)
    medians = train_design.median()
    train_design = train_design.fillna(medians)
    score_design = score_design.fillna(medians)
    means = train_design.mean()
    stds = train_design.std().replace(0, 1.0)
    return (
        ((train_design - means) / stds).to_numpy(),
        ((score_design - means) / stds).to_numpy(),
        train_design,
        means,
        stds,
    )


def transform_with_design(
    df: pd.DataFrame,
    columns: list[str],
    means: pd.Series,
    stds: pd.Series,
    numeric_cols: list[str],
    categorical_cols: list[str],
) -> np.ndarray:
    design = pd.get_dummies(df[numeric_cols + categorical_cols], columns=categorical_cols, dummy_na=True)
    design = design.reindex(columns=columns, fill_value=0).astype(float)
    design = design.fillna(means)
    return ((design - means) / stds).to_numpy()


def roc_auc_score(y_true: np.ndarray, y_score: np.ndarray) -> float:
    y_true = y_true.astype(int)
    n_pos = int(y_true.sum())
    n_neg = int(y_true.size - n_pos)
    if n_pos == 0 or n_neg == 0:
        return float("nan")
    order = np.argsort(y_score)
    ranks = np.empty_like(order, dtype=float)
    ranks[order] = np.arange(1, y_score.size + 1)
    pos_rank_sum = ranks[y_true == 1].sum()
    return float((pos_rank_sum - n_pos * (n_pos + 1) / 2) / (n_pos * n_neg))


def classification_metrics(y_true: np.ndarray, y_score: np.ndarray, top_fraction: float) -> dict:
    threshold = float(np.quantile(y_score, 1 - top_fraction))
    flagged = y_score >= threshold
    positives = y_true.astype(bool)
    tp = int((flagged & positives).sum())
    fp = int((flagged & ~positives).sum())
    fn = int((~flagged & positives).sum())
    tn = int((~flagged & ~positives).sum())
    precision = tp / max(1, tp + fp)
    recall = tp / max(1, tp + fn)
    fpr = fp / max(1, fp + tn)
    return {
        "roc_auc": roc_auc_score(y_true, y_score),
        "threshold_for_top_fraction": threshold,
        "flagged_fraction": float(flagged.mean()),
        "precision_at_capacity": precision,
        "recall_at_capacity": recall,
        "false_positive_rate_at_capacity": fpr,
    }


def assign_bands(scores: pd.Series) -> pd.Series:
    red_cut = scores.quantile(0.94)
    amber_cut = scores.quantile(0.80)
    return pd.Series(np.where(scores >= red_cut, "red", np.where(scores >= amber_cut, "amber", "green")), index=scores.index)


def reason_codes(row: pd.Series) -> list[str]:
    reasons = []
    if row["missing_assignments_to_date"] >= 1 or row["submission_completion_rate_to_date"] < 0.85:
        reasons.append(
            f"assessment: {int(row['missing_assignments_to_date'])} missed assignments, "
            f"{row['submission_completion_rate_to_date']:.0%} submitted"
        )
    if row["days_since_last_lms_login"] >= 4 or row["lms_logins_14d"] <= 12:
        reasons.append(f"digital engagement: {int(row['days_since_last_lms_login'])} days since last LMS login")
    if row["payment_overdue_days"] > 0 or row["outstanding_balance_nok"] > 5000:
        reasons.append(
            f"finance: NOK {row['outstanding_balance_nok']:.0f} balance, "
            f"{int(row['payment_overdue_days'])} overdue days"
        )
    if row["preferred_study_mode"] != "online-heavy" and row["campus_drop_14d_vs_start"] >= 3:
        reasons.append(f"campus: {int(row['campus_drop_14d_vs_start'])} fewer active campus days vs term start")
    if row["prior_gpa"] < 2.5:
        reasons.append(f"academic history: prior GPA {row['prior_gpa']:.2f}")
    if not reasons:
        reasons.append("advisor review: multiple moderate deviations, no single dominant driver")
    return reasons[:3]


def feature_hash(row: pd.Series, columns: list[str]) -> str:
    payload = {}
    for col in columns:
        value = row[col]
        if isinstance(value, (pd.Timestamp,)):
            value = value.isoformat()
        elif isinstance(value, (np.integer, np.floating)):
            value = value.item()
        payload[col] = value
    return hashlib.sha256(json.dumps(payload, sort_keys=True, default=str).encode("utf-8")).hexdigest()[:18]


def fairness_report(scored: pd.DataFrame, threshold: float) -> pd.DataFrame:
    rows = []
    flagged = scored["risk_score"].ge(threshold)
    label = scored["dropout_label_next_term"].astype(bool)
    for group_col in ["gender", "international_student", "first_generation_student", "faculty_asof", "preferred_study_mode"]:
        for value, frame in scored.groupby(group_col, dropna=False):
            idx = frame.index
            group_flagged = flagged.loc[idx]
            group_label = label.loc[idx]
            rows.append(
                {
                    "group": group_col,
                    "value": str(value),
                    "students": len(frame),
                    "label_rate_demo_only": round(float(group_label.mean()), 4),
                    "avg_risk_score": round(float(frame["risk_score"].mean()), 4),
                    "flag_rate": round(float(group_flagged.mean()), 4),
                    "recall_if_labels_later_confirm": round(float((group_flagged & group_label).sum() / max(1, group_label.sum())), 4),
                    "false_positive_rate_if_labels_later_confirm": round(
                        float((group_flagged & ~group_label).sum() / max(1, (~group_label).sum())), 4
                    ),
                }
            )
    return pd.DataFrame(rows)


def data_dictionary() -> pd.DataFrame:
    rows = [
        ("sis_students", "canonical_student_id", "string", "Pseudonymous university-wide student key."),
        ("sis_students", "faculty/programme", "string", "Point-in-time academic ownership from SIS."),
        ("sis_students", "gender, age_band, international_student", "string/bool", "Sensitive or demographic fields used for fairness audit, not default model input."),
        ("identity_map", "source_system/source_person_id", "string", "Maps SIS/LMS/ERP/CAMPUS identifiers to canonical student keys."),
        ("status_history", "valid_from/valid_to", "timestamp", "Type-2 history for programme/status changes."),
        ("enrollment_terms", "prior_gpa", "float", "Academic context available before the term starts."),
        ("enrollment_terms", "dropout_label_next_term", "bool", "Training label observed only after the term; never available at scoring time."),
        ("lms_activity_daily", "login_count/course_views/video_minutes", "integer", "Daily digital engagement events."),
        ("lms_activity_daily", "assignment_due/submitted/late", "integer", "Assessment progress signals."),
        ("erp_finance_snapshots", "outstanding_balance_nok/payment_overdue_days", "numeric", "Financial friction snapshot from ERP."),
        ("campus_activity_daily", "building_entry_count/wifi_minutes/library_entry_count", "integer", "Physical campus engagement aggregated by day."),
        ("feature_student_week", "as_of_date", "date", "Point-in-time feature timestamp used for leakage control."),
        ("risk_predictions", "risk_score/risk_band/top_reasons", "numeric/string", "Advisor-facing prediction output with explanation."),
        ("access_audit", "viewer/action/purpose/timestamp", "string/timestamp", "Evidence for who accessed predictions and why."),
    ]
    return pd.DataFrame(rows, columns=["table", "column", "type", "description"])


def write_outputs(sources: SourceData, scored: pd.DataFrame, validation: pd.DataFrame, fairness: pd.DataFrame, out_dir: Path) -> Path:
    out_dir.mkdir(parents=True, exist_ok=True)
    source_dir = out_dir / "source_samples"
    source_dir.mkdir(exist_ok=True)

    sources.sis_students.to_csv(source_dir / "sis_students.csv", index=False)
    sources.identity_map.to_csv(source_dir / "identity_map.csv", index=False)
    sources.status_history.to_csv(source_dir / "status_history.csv", index=False)
    sources.enrollment_terms.to_csv(source_dir / "enrollment_terms.csv", index=False)
    sources.lms_activity_daily.to_csv(source_dir / "lms_activity_daily.csv", index=False)
    sources.erp_finance_snapshots.to_csv(source_dir / "erp_finance_snapshots.csv", index=False)
    sources.campus_activity_daily.to_csv(source_dir / "campus_activity_daily.csv", index=False)

    scored.to_csv(out_dir / "feature_student_week_and_predictions.csv", index=False)
    advisor_cols = [
        "prediction_id",
        "canonical_student_id",
        "term_id",
        "as_of_date",
        "advisor_id",
        "faculty_asof",
        "programme_asof",
        "risk_score",
        "risk_band",
        "top_reasons",
        "model_version",
        "feature_snapshot_hash",
    ]
    advisor_view = scored[advisor_cols].copy()
    advisor_view["risk_band_order"] = advisor_view["risk_band"].map({"red": 0, "amber": 1, "green": 2})
    advisor_view = advisor_view.sort_values(["risk_band_order", "risk_score"], ascending=[True, False]).drop(
        columns=["risk_band_order"]
    )
    advisor_view.to_csv(out_dir / "advisor_risk_list.csv", index=False)
    validation.to_csv(out_dir / "model_validation_metrics.csv", index=False)
    fairness.to_csv(out_dir / "fairness_report.csv", index=False)
    data_dictionary().to_csv(out_dir / "data_dictionary.csv", index=False)
    access_audit(scored).to_csv(out_dir / "access_audit_sample.csv", index=False)
    plot_outputs(scored, fairness, out_dir)
    dashboard_path = write_dashboard(scored, validation, fairness, out_dir)
    return dashboard_path


def access_audit(scored: pd.DataFrame) -> pd.DataFrame:
    viewed = scored.sort_values("risk_score", ascending=False).head(80).copy()
    rows = []
    for n, (_, row) in enumerate(viewed.iterrows(), start=1):
        rows.append(
            {
                "audit_event_id": f"AUD-{n:05d}",
                "event_timestamp": row["as_of_date"] + pd.Timedelta(hours=9, minutes=n % 50),
                "viewer_user_id": row["advisor_id"],
                "viewer_role": "academic_advisor",
                "action": "view_student_risk_prediction",
                "prediction_id": row["prediction_id"],
                "canonical_student_id": row["canonical_student_id"],
                "purpose": "student_support_intervention",
                "legal_basis": "public_task_with_documented_DPIA",
                "fields_returned": "risk_band, risk_score, top_reasons, programme, advisor_case_context",
                "model_version": row["model_version"],
            }
        )
    return pd.DataFrame(rows)


def plot_outputs(scored: pd.DataFrame, fairness: pd.DataFrame, out_dir: Path) -> None:
    fig, ax = plt.subplots(figsize=(9, 4.8))
    for band, color in [("green", "#3a7d44"), ("amber", "#b98517"), ("red", "#b3261e")]:
        values = scored.loc[scored["risk_band"].eq(band), "risk_score"]
        ax.hist(values, bins=24, alpha=0.72, label=band, color=color)
    ax.set_title("Risk score distribution by advisor band")
    ax.set_xlabel("Predicted dropout risk")
    ax.set_ylabel("Students")
    ax.legend()
    fig.tight_layout()
    fig.savefig(out_dir / "risk_distribution.png", dpi=160)
    plt.close(fig)

    selected = fairness.loc[fairness["group"].isin(["preferred_study_mode", "international_student", "first_generation_student"])]
    selected = selected.sort_values(["group", "value"])
    fig, ax = plt.subplots(figsize=(9, 4.8))
    labels = selected["group"].str.replace("_", " ") + ": " + selected["value"]
    ax.barh(labels, selected["flag_rate"], color="#386fa4")
    ax.set_title("Flag rate checks across selected groups")
    ax.set_xlabel("Flag rate at advisor capacity")
    ax.set_xlim(0, max(0.25, float(selected["flag_rate"].max()) + 0.03))
    fig.tight_layout()
    fig.savefig(out_dir / "fairness_flag_rates.png", dpi=160)
    plt.close(fig)


def dashboard_payload(scored: pd.DataFrame, validation: pd.DataFrame, fairness: pd.DataFrame) -> dict:
    advisor_cols = [
        "prediction_id",
        "canonical_student_id",
        "advisor_id",
        "faculty_asof",
        "programme_asof",
        "preferred_study_mode",
        "academic_standing",
        "student_status_asof",
        "aid_status",
        "risk_score",
        "risk_band",
        "top_reasons",
        "prior_gpa",
        "credits_registered",
        "credits_completed_previous_term",
        "lms_logins_14d",
        "lms_course_views_14d",
        "video_minutes_14d",
        "forum_posts_14d",
        "quiz_attempts_14d",
        "days_since_last_lms_login",
        "missing_assignments_to_date",
        "late_submissions_to_date",
        "submission_completion_rate_to_date",
        "payment_overdue_days",
        "outstanding_balance_nok",
        "campus_days_14d",
        "building_entries_14d",
        "library_entries_30d",
        "campus_drop_14d_vs_start",
        "feature_snapshot_hash",
    ]
    advisor_rows = scored[advisor_cols].copy()
    advisor_rows["risk_score"] = advisor_rows["risk_score"].round(4)
    advisor_rows["prior_gpa"] = advisor_rows["prior_gpa"].round(2)
    advisor_rows["submission_completion_rate_to_date"] = advisor_rows["submission_completion_rate_to_date"].round(4)
    advisor_rows["outstanding_balance_nok"] = advisor_rows["outstanding_balance_nok"].round(0).astype(int)
    for column in [
        "credits_registered",
        "credits_completed_previous_term",
        "lms_logins_14d",
        "lms_course_views_14d",
        "video_minutes_14d",
        "forum_posts_14d",
        "quiz_attempts_14d",
        "days_since_last_lms_login",
        "missing_assignments_to_date",
        "late_submissions_to_date",
        "payment_overdue_days",
        "campus_days_14d",
        "building_entries_14d",
        "library_entries_30d",
        "campus_drop_14d_vs_start",
    ]:
        advisor_rows[column] = advisor_rows[column].round(0).astype(int)
    advisor_rows["risk_band_order"] = advisor_rows["risk_band"].map({"red": 0, "amber": 1, "green": 2})
    advisor_rows = advisor_rows.sort_values(["risk_band_order", "risk_score"], ascending=[True, False]).drop(
        columns=["risk_band_order"]
    )

    faculty = (
        scored.groupby("faculty_asof")
        .agg(
            students=("canonical_student_id", "count"),
            avg_risk=("risk_score", "mean"),
            red=("risk_band", lambda values: int((values == "red").sum())),
            amber=("risk_band", lambda values: int((values == "amber").sum())),
        )
        .reset_index()
        .sort_values("avg_risk", ascending=False)
    )
    faculty["avg_risk"] = faculty["avg_risk"].round(4)

    band_counts = scored["risk_band"].value_counts().reindex(["red", "amber", "green"], fill_value=0)
    validation_rows = validation.round(4).to_dict(orient="records")
    fairness_rows = fairness.to_dict(orient="records")

    return {
        "generated_at": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
        "as_of_date": str(scored["as_of_date"].iloc[0].date()),
        "model_version": str(scored["model_version"].iloc[0]),
        "students_scored": int(len(scored)),
        "band_counts": {band: int(count) for band, count in band_counts.items()},
        "average_risk": round(float(scored["risk_score"].mean()), 4),
        "validation": validation_rows,
        "faculty": faculty.to_dict(orient="records"),
        "fairness": fairness_rows,
        "students": advisor_rows.to_dict(orient="records"),
    }


def write_dashboard(scored: pd.DataFrame, validation: pd.DataFrame, fairness: pd.DataFrame, out_dir: Path) -> Path:
    payload = dashboard_payload(scored, validation, fairness)
    data_json = json.dumps(payload, default=str)
    html = DASHBOARD_TEMPLATE.replace("__DASHBOARD_DATA__", data_json)
    dashboard_path = out_dir / "demo_dashboard.html"
    dashboard_path.write_text(html, encoding="utf-8")
    return dashboard_path


DASHBOARD_TEMPLATE = r"""<!doctype html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>Student Outcome Intelligence Demo</title>
  <style>
    :root {
      --ink: #17202a;
      --muted: #667586;
      --bg: #f4f7fa;
      --panel: #ffffff;
      --line: #d7dfe8;
      --navy: #143048;
      --green: #217463;
      --amber: #b36b16;
      --red: #b3261e;
      --blue: #1f5c88;
    }
    * { box-sizing: border-box; }
    body {
      margin: 0;
      background: var(--bg);
      color: var(--ink);
      font-family: "Segoe UI", Arial, Helvetica, sans-serif;
      letter-spacing: 0;
    }
    .topbar {
      background:
        linear-gradient(90deg, rgba(255,255,255,.08) 0, rgba(255,255,255,.08) 1px, transparent 1px),
        linear-gradient(180deg, rgba(255,255,255,.06) 0, rgba(255,255,255,.06) 1px, transparent 1px),
        var(--navy);
      background-size: 54px 54px;
      color: #f7fbff;
      padding: 28px 34px 32px;
      border-top: 8px solid var(--green);
    }
    .topbar h1 {
      margin: 0;
      font-size: clamp(30px, 4vw, 54px);
      line-height: 1.04;
      max-width: 980px;
    }
    .subtitle {
      color: #d9e5ee;
      margin-top: 12px;
      max-width: 980px;
      font-size: 18px;
      line-height: 1.45;
    }
    .meta {
      display: flex;
      flex-wrap: wrap;
      gap: 10px;
      margin-top: 18px;
    }
    .pill {
      border: 1px solid rgba(255,255,255,.22);
      background: rgba(255,255,255,.08);
      color: #eef6fb;
      border-radius: 999px;
      padding: 8px 12px;
      font-size: 13px;
      font-weight: 700;
    }
    main {
      max-width: 1420px;
      margin: 0 auto;
      padding: 24px 26px 42px;
    }
    .cards {
      display: grid;
      grid-template-columns: repeat(4, minmax(0, 1fr));
      gap: 16px;
      margin-top: -42px;
      margin-bottom: 18px;
    }
    .card, .panel {
      background: var(--panel);
      border: 1px solid var(--line);
      border-radius: 8px;
      box-shadow: 0 18px 36px rgba(20, 48, 72, .08);
    }
    .card {
      padding: 20px;
      min-height: 120px;
      border-left: 5px solid var(--green);
    }
    .card.red { border-left-color: var(--red); }
    .card.amber { border-left-color: var(--amber); }
    .card.blue { border-left-color: var(--blue); }
    .label {
      color: var(--muted);
      font-size: 12px;
      text-transform: uppercase;
      font-weight: 800;
      letter-spacing: .8px;
      margin-bottom: 9px;
    }
    .value {
      font-size: 36px;
      line-height: 1;
      font-weight: 850;
      color: var(--navy);
      font-variant-numeric: tabular-nums;
    }
    .hint {
      color: var(--muted);
      font-size: 14px;
      line-height: 1.35;
      margin-top: 10px;
    }
    .layout {
      display: grid;
      grid-template-columns: minmax(0, 1.7fr) minmax(320px, .9fr);
      gap: 18px;
    }
    .panel {
      overflow: hidden;
    }
    .panel-header {
      display: flex;
      align-items: center;
      justify-content: space-between;
      gap: 12px;
      padding: 18px 20px;
      border-bottom: 1px solid var(--line);
      background: #fbfcfd;
    }
    .panel-header h2 {
      margin: 0;
      font-size: 20px;
      color: var(--navy);
    }
    .queue-title {
      display: grid;
      gap: 6px;
    }
    .queue-status {
      color: var(--muted);
      font-size: 13px;
      font-weight: 650;
    }
    .tools {
      display: flex;
      gap: 8px;
      flex-wrap: wrap;
      align-items: center;
    }
    .queue-tools {
      justify-content: flex-end;
      max-width: 860px;
    }
    button {
      font: inherit;
    }
    input, select {
      border: 1px solid var(--line);
      background: #fff;
      border-radius: 7px;
      padding: 9px 11px;
      font-size: 14px;
      color: var(--ink);
      min-height: 38px;
    }
    input { min-width: 250px; }
    select { min-width: 154px; }
    .sort-select { min-width: 214px; }
    .action-button {
      min-height: 38px;
      border: 1px solid var(--line);
      background: #eef4f8;
      color: var(--navy);
      border-radius: 7px;
      padding: 8px 12px;
      font-weight: 850;
      cursor: pointer;
    }
    .action-button:hover { background: #dceaf2; }
    .tabs {
      display: inline-flex;
      border: 1px solid var(--line);
      border-radius: 999px;
      overflow: hidden;
      background: #fff;
    }
    .tabs button {
      border: 0;
      background: transparent;
      padding: 9px 13px;
      font-weight: 800;
      cursor: pointer;
      color: var(--muted);
    }
    .tabs button.active {
      background: var(--navy);
      color: #fff;
    }
    table {
      width: 100%;
      border-collapse: collapse;
    }
    th, td {
      text-align: left;
      padding: 12px 14px;
      border-bottom: 1px solid var(--line);
      vertical-align: top;
      font-size: 14px;
    }
    th {
      background: var(--navy);
      color: #fff;
      position: sticky;
      top: 0;
      z-index: 1;
      text-transform: uppercase;
      letter-spacing: .5px;
      font-size: 12px;
    }
    .sort-button {
      display: inline-flex;
      align-items: center;
      gap: 6px;
      border: 0;
      padding: 0;
      margin: 0;
      background: transparent;
      color: inherit;
      text-transform: inherit;
      letter-spacing: inherit;
      font-size: inherit;
      font-weight: inherit;
      cursor: pointer;
    }
    .sort-button .sort-icon {
      display: inline-flex;
      align-items: center;
      justify-content: center;
      width: 14px;
      height: 14px;
      border-radius: 999px;
      background: rgba(255,255,255,.18);
      color: #fff;
      font-size: 10px;
      line-height: 1;
    }
    .sort-button:not(.active) .sort-icon {
      background: transparent;
      color: rgba(255,255,255,.58);
    }
    tbody tr:hover td { background: #f7fafc; }
    tbody tr[data-student-id] {
      cursor: pointer;
    }
    tbody tr.selected td { background: #edf6fb; }
    tbody tr.selected td:first-child { box-shadow: inset 3px 0 0 var(--blue); }
    .table-wrap {
      max-height: 640px;
      overflow: auto;
    }
    .band {
      display: inline-flex;
      align-items: center;
      justify-content: center;
      min-width: 64px;
      border-radius: 999px;
      color: #fff;
      padding: 6px 9px;
      font-size: 12px;
      font-weight: 850;
      text-transform: uppercase;
    }
    .band.red { background: var(--red); }
    .band.amber { background: var(--amber); }
    .band.green { background: var(--green); }
    .score {
      font-weight: 850;
      color: var(--navy);
      font-variant-numeric: tabular-nums;
    }
    .reasons {
      color: var(--ink);
      line-height: 1.35;
      max-width: 380px;
    }
    .reason-preview {
      display: -webkit-box;
      -webkit-line-clamp: 4;
      -webkit-box-orient: vertical;
      overflow: hidden;
    }
    .signal-list {
      display: grid;
      gap: 5px;
      min-width: 126px;
      color: var(--muted);
      font-size: 12px;
      line-height: 1.2;
    }
    .signal-list strong {
      color: var(--navy);
      font-variant-numeric: tabular-nums;
    }
    .side-stack {
      display: grid;
      gap: 18px;
      align-content: start;
    }
    .detail {
      padding: 18px 20px 20px;
    }
    .detail-head {
      display: flex;
      align-items: flex-start;
      justify-content: space-between;
      gap: 12px;
      margin-bottom: 14px;
    }
    .detail-head strong {
      display: block;
      color: var(--navy);
      font-size: 18px;
    }
    .detail-meta {
      color: var(--muted);
      font-size: 13px;
      line-height: 1.4;
      margin-top: 3px;
    }
    .detail-list {
      display: grid;
      grid-template-columns: repeat(2, minmax(0, 1fr));
      gap: 0 16px;
      margin-top: 12px;
    }
    .detail-item {
      border-top: 1px solid var(--line);
      padding: 10px 0;
      min-width: 0;
    }
    .detail-item .label {
      letter-spacing: .4px;
      margin-bottom: 5px;
    }
    .detail-item strong {
      color: var(--navy);
      font-variant-numeric: tabular-nums;
    }
    .reason-box {
      border-top: 1px solid var(--line);
      margin-top: 6px;
      padding-top: 12px;
      color: var(--ink);
      font-size: 13px;
      line-height: 1.45;
    }
    .bars {
      padding: 18px 20px 20px;
      display: grid;
      gap: 12px;
    }
    .bar-row {
      display: grid;
      grid-template-columns: 150px 1fr 54px;
      gap: 10px;
      align-items: center;
      font-size: 13px;
    }
    .bar-track {
      height: 10px;
      background: #e8eef4;
      border-radius: 999px;
      overflow: hidden;
    }
    .bar-fill {
      height: 100%;
      background: var(--blue);
      border-radius: 999px;
    }
    .chart-grid {
      display: grid;
      gap: 12px;
      padding: 18px 20px 20px;
    }
    .chart-grid img {
      width: 100%;
      border: 1px solid var(--line);
      border-radius: 7px;
      background: #fff;
    }
    .metric-table td:first-child {
      color: var(--muted);
      width: 55%;
    }
    .empty {
      padding: 26px;
      color: var(--muted);
      text-align: center;
    }
    .footnote {
      margin-top: 18px;
      color: var(--muted);
      font-size: 13px;
      line-height: 1.45;
    }
    /* Case-study visual layer: match the written solution's calm textbook style. */
    :root {
      --ink: #1f2925;
      --muted: #5b6962;
      --bg: #f7f2e8;
      --panel: #ffffff;
      --line: #ded3c3;
      --navy: #123846;
      --green: #0d6e58;
      --amber: #b97722;
      --red: #a9463f;
      --blue: #2e7489;
      --soft: #edf4ed;
      --paper-grid: rgba(47,111,91,.035);
    }
    body {
      background:
        linear-gradient(90deg, rgba(47,111,91,.035) 0, rgba(47,111,91,.035) 1px, transparent 1px),
        linear-gradient(180deg, rgba(47,111,91,.03) 0, rgba(47,111,91,.03) 1px, transparent 1px),
        var(--bg);
      background-size: 34px 34px;
    }
    .topbar {
      color: var(--ink);
      background:
        linear-gradient(135deg, rgba(47,111,91,.12) 0 32%, transparent 32%),
        linear-gradient(180deg, #ffffff 0%, var(--bg) 100%);
      border-top: 8px solid var(--green);
      border-bottom: 1px solid var(--line);
      padding: 30px 34px 56px;
    }
    .topbar h1 {
      color: var(--navy);
      font-family: Georgia, "Times New Roman", serif;
      font-size: clamp(34px, 4vw, 56px);
      letter-spacing: 0;
    }
    .subtitle {
      color: var(--muted);
      max-width: 940px;
      font-size: 17px;
    }
    .pill {
      border-color: var(--line);
      background: rgba(255,253,247,.84);
      color: var(--navy);
      border-radius: 6px;
      font-weight: 800;
    }
    main {
      padding-top: 24px;
    }
    .cards {
      gap: 14px;
      margin-top: -46px;
      margin-bottom: 18px;
    }
    .card,
    .panel {
      background: var(--panel);
      border-color: var(--line);
      border-radius: 6px;
      box-shadow: none;
    }
    .card {
      border-left: 1px solid var(--line);
      border-top: 4px solid var(--green);
      min-height: 124px;
    }
    .card.red { border-left-color: var(--line); border-top-color: var(--red); }
    .card.amber { border-left-color: var(--line); border-top-color: var(--amber); }
    .card.blue { border-left-color: var(--line); border-top-color: var(--blue); }
    .label {
      color: var(--green);
      letter-spacing: .45px;
    }
    .value {
      color: var(--navy);
      font-family: Georgia, "Times New Roman", serif;
      font-size: 34px;
    }
    .hint {
      color: var(--muted);
    }
    .panel {
      border-top: 4px solid var(--green);
    }
    .side-stack .panel:nth-child(2),
    section.panel {
      border-top-color: var(--blue);
    }
    .side-stack .panel:nth-child(3) {
      border-top-color: var(--amber);
    }
    .side-stack .panel:nth-child(4) {
      border-top-color: var(--green);
    }
    .panel-header {
      background:
        linear-gradient(90deg, var(--paper-grid) 0, var(--paper-grid) 1px, transparent 1px),
        #ffffff;
      background-size: 28px 28px;
      border-bottom-color: var(--line);
    }
    .panel-header h2 {
      color: var(--navy);
      font-family: Georgia, "Times New Roman", serif;
      font-size: 19px;
      letter-spacing: 0;
    }
    input,
    select {
      background: #fff;
      border-color: var(--line);
      border-radius: 5px;
      color: var(--ink);
    }
    input:focus,
    select:focus {
      border-color: var(--green);
      outline: 2px solid rgba(47,111,91,.14);
      outline-offset: 1px;
    }
    .action-button,
    .tabs {
      border-color: var(--line);
      border-radius: 5px;
      background: #ffffff;
      box-shadow: none;
    }
    .action-button:hover {
      background: var(--soft);
    }
    .tabs button {
      color: var(--muted);
    }
    .tabs button.active {
      background: var(--navy);
      color: #fff;
    }
    th {
      background: var(--navy);
      color: #fff;
      letter-spacing: .45px;
    }
    th,
    td {
      border-bottom-color: var(--line);
    }
    tbody tr:hover td {
      background: #ffffff;
    }
    tbody tr.selected td {
      background: #f5efe2;
    }
    tbody tr.selected td:first-child {
      box-shadow: inset 4px 0 0 var(--green);
    }
    .band {
      border-radius: 5px;
      min-width: 62px;
    }
    .band.red { background: var(--red); }
    .band.amber { background: var(--amber); }
    .band.green { background: var(--green); }
    .score,
    .detail-head strong,
    .detail-item strong,
    .signal-list strong {
      color: var(--navy);
    }
    .detail,
    .bars,
    .chart-grid,
    .footnote {
      background: var(--panel);
    }
    .detail-item,
    .reason-box {
      border-top-color: var(--line);
    }
    .bar-track {
      background: #efe8dc;
      border-radius: 4px;
    }
    .bar-fill {
      background: var(--blue);
      border-radius: 4px;
    }
    .chart-grid img {
      border-color: var(--line);
      border-radius: 6px;
      background: #fff;
    }
    .metric-table td:first-child {
      color: var(--muted);
    }
    .empty {
      color: var(--muted);
      background: var(--panel);
    }
    .footnote {
      border: 1px solid var(--line);
      border-radius: 6px;
      padding: 14px 16px;
    }
    @media (max-width: 1050px) {
      .cards { grid-template-columns: repeat(2, minmax(0, 1fr)); }
      .layout { grid-template-columns: 1fr; }
      .table-wrap { max-height: none; }
      .queue-tools { justify-content: flex-start; max-width: none; }
    }
    @media (max-width: 680px) {
      .cards { grid-template-columns: 1fr; margin-top: 0; }
      main { padding: 18px 14px 32px; }
      .topbar { padding: 24px 18px; }
      input, select, .sort-select, .action-button { width: 100%; min-width: 100%; }
      .tabs { width: 100%; }
      .tabs button { flex: 1; }
      .panel-header { align-items: flex-start; flex-direction: column; }
      .bar-row { grid-template-columns: 1fr; }
      .detail-list { grid-template-columns: 1fr; }
    }

    /* ============================================================
       MINIMALIST WHITEPAPER OVERRIDE — match the case page
       ============================================================ */

    body { background: #ffffff; color: #1a2332; line-height: 1.55; }
    body::before, body::after { display: none !important; }

    .topbar,
    .panel,
    .card,
    .summary-card,
    .summary-tile,
    .metric-card,
    .stat-card,
    .filter-bar,
    .tabs,
    main {
      background: #ffffff !important;
      background-image: none !important;
      box-shadow: none !important;
    }
    .topbar { border-bottom: 1px solid #e6e8ec; }
    .panel,
    .card,
    .summary-card,
    .summary-tile,
    .metric-card {
      border: 1px solid #e6e8ec;
      border-radius: 4px;
    }

    h1, h2, h3 {
      color: #1a2332;
      font-family: "Segoe UI", "Inter", system-ui, sans-serif;
      font-weight: 600;
      letter-spacing: -.005em;
    }
    h1 { font-weight: 700; }

    .kicker, .label, .stat-label, .summary-tile-label {
      background: transparent !important;
      border: 0 !important;
      padding: 0 !important;
      color: #5b6675 !important;
      font-weight: 600;
      font-size: 11px;
      letter-spacing: 1.5px;
      text-transform: uppercase;
    }

    .button,
    .tabs button,
    .filter-bar button,
    .filter-bar select,
    .filter-bar input {
      background: #ffffff;
      color: #1a2332;
      border: 1px solid #e6e8ec;
      box-shadow: none !important;
      border-radius: 4px;
      font-weight: 500;
      font-size: 13px;
    }
    .tabs button.active,
    .button.primary {
      background: #1a2332;
      color: #ffffff;
      border-color: #1a2332;
      font-weight: 600;
    }

    table { border-collapse: collapse; width: 100%; }
    th, td {
      border-bottom: 1px solid #e6e8ec;
      padding: 10px 12px;
      font-size: 13px;
      text-align: left;
      vertical-align: top;
    }
    thead th {
      background: #f5f7f9;
      border-bottom: 2px solid #1a2332;
      font-weight: 600;
      font-size: 11px;
      letter-spacing: 1px;
      text-transform: uppercase;
    }
    tbody tr:hover { background: #f5f7f9; }

    code {
      background: #f5f7f9;
      border: 1px solid #e6e8ec;
      border-radius: 3px;
      padding: 1px 5px;
      font-size: 12.5px;
      color: #1a2332;
    }
  </style>
</head>
<body>
  <header class="topbar">
    <h1>Student Outcome Intelligence Demo</h1>
    <div class="subtitle">Interactive front end for the generated dropout-risk prototype: advisor risk list, model validation, faculty patterns, fairness checks, and audit-ready explanation fields.</div>
    <div class="meta">
      <span class="pill">As of <span id="asOf"></span></span>
      <span class="pill">Model <span id="modelVersion"></span></span>
      <span class="pill">Generated <span id="generatedAt"></span></span>
    </div>
  </header>

  <main>
    <section class="cards">
      <div class="card blue">
        <div class="label">Students scored</div>
        <div class="value" id="studentsScored">0</div>
        <div class="hint">Active students in the Spring 2026 dummy scoring cohort.</div>
      </div>
      <div class="card red">
        <div class="label">Red cases</div>
        <div class="value" id="redCount">0</div>
        <div class="hint">Highest-priority advisor queue.</div>
      </div>
      <div class="card amber">
        <div class="label">Amber cases</div>
        <div class="value" id="amberCount">0</div>
        <div class="hint">Students worth monitoring or light-touch outreach.</div>
      </div>
      <div class="card">
        <div class="label">Holdout ROC-AUC</div>
        <div class="value" id="aucValue">0.00</div>
        <div class="hint">Demo validation using point-in-time Autumn 2025 snapshots.</div>
      </div>
    </section>

    <section class="layout">
      <div class="panel">
        <div class="panel-header">
          <div class="queue-title">
            <h2>Advisor Risk Queue</h2>
            <div class="queue-status"><span id="queueCount">0 matching students</span> · <span id="queueLimit">Showing top 250</span></div>
          </div>
          <div class="tools queue-tools">
            <input id="search" type="search" placeholder="Search student, advisor, reason">
            <select id="facultyFilter" aria-label="Filter by faculty">
              <option value="all">All faculties</option>
            </select>
            <select id="modeFilter" aria-label="Filter by study mode">
              <option value="all">All study modes</option>
            </select>
            <select id="sortSelect" class="sort-select" aria-label="Sort advisor queue">
              <option value="band">Sort: band priority</option>
              <option value="student_asc">Sort: student ID</option>
              <option value="risk_desc">Sort: highest risk</option>
              <option value="risk_asc">Sort: lowest risk</option>
              <option value="gpa_asc">Sort: lowest GPA</option>
              <option value="missing_desc">Sort: missed assignments</option>
              <option value="overdue_desc">Sort: overdue days</option>
              <option value="balance_desc">Sort: outstanding balance</option>
              <option value="lms_asc">Sort: lowest LMS logins</option>
              <option value="faculty_asc">Sort: faculty</option>
              <option value="advisor_asc">Sort: advisor</option>
            </select>
            <button id="resetFilters" class="action-button" type="button">Reset</button>
            <div class="tabs" aria-label="Risk band filter">
              <button type="button" class="active" data-band="all">All</button>
              <button type="button" data-band="red">Red</button>
              <button type="button" data-band="amber">Amber</button>
              <button type="button" data-band="green">Green</button>
            </div>
          </div>
        </div>
        <div class="table-wrap">
          <table>
            <thead>
              <tr>
                <th><button type="button" class="sort-button active" data-sort="band">Band <span class="sort-icon">v</span></button></th>
                <th><button type="button" class="sort-button" data-sort="student_asc">Student <span class="sort-icon"></span></button></th>
                <th><button type="button" class="sort-button" data-sort="faculty_asc">Faculty / Programme <span class="sort-icon"></span></button></th>
                <th><button type="button" class="sort-button" data-sort="risk_desc" data-alt-sort="risk_asc">Score <span class="sort-icon"></span></button></th>
                <th><button type="button" class="sort-button" data-sort="missing_desc">Signals <span class="sort-icon"></span></button></th>
                <th>Reasons</th>
                <th><button type="button" class="sort-button" data-sort="advisor_asc">Advisor <span class="sort-icon"></span></button></th>
              </tr>
            </thead>
            <tbody id="riskRows"></tbody>
          </table>
          <div class="empty" id="emptyState" hidden>No students match the current filters.</div>
        </div>
      </div>

      <aside class="side-stack">
        <div class="panel">
          <div class="panel-header">
            <h2>Selected Student</h2>
            <span id="detailBand"></span>
          </div>
          <div class="detail" id="studentDetail"></div>
        </div>
        <div class="panel">
          <div class="panel-header"><h2>Faculty Snapshot</h2></div>
          <div class="bars" id="facultyBars"></div>
        </div>
        <div class="panel">
          <div class="panel-header"><h2>Validation Metrics</h2></div>
          <table class="metric-table"><tbody id="metricsRows"></tbody></table>
        </div>
        <div class="panel">
          <div class="panel-header"><h2>Charts</h2></div>
          <div class="chart-grid">
            <img data-dashboard-src="risk_distribution.png" alt="Risk score distribution by band">
            <img data-dashboard-src="fairness_flag_rates.png" alt="Fairness flag rate checks">
          </div>
        </div>
      </aside>
    </section>

    <section class="panel" style="margin-top:18px">
      <div class="panel-header">
        <h2>Fairness Checks</h2>
        <div class="hint">Flag rates and error rates are demo-only because Spring 2026 labels are generated for illustration.</div>
      </div>
      <div class="table-wrap" style="max-height:340px">
        <table>
          <thead>
            <tr>
              <th>Group</th>
              <th>Value</th>
              <th>Students</th>
              <th>Label rate</th>
              <th>Avg score</th>
              <th>Flag rate</th>
              <th>Recall</th>
              <th>False positive rate</th>
            </tr>
          </thead>
          <tbody id="fairnessRows"></tbody>
        </table>
      </div>
    </section>

    <div class="footnote">
      This is a synthetic demo. The front end reads embedded generated data from this HTML file and can be opened directly without a server.
      Production would add authentication, row-level access, intervention logging, and approved data governance controls.
    </div>
  </main>

  <script>
    const dashboardAssetBase = location.protocol === "file:" || location.pathname.includes("/outputs/") ? "" : "/outputs/";
    document.querySelectorAll("img[data-dashboard-src]").forEach(img => {
      img.src = `${dashboardAssetBase}${img.dataset.dashboardSrc}`;
    });
    const data = __DASHBOARD_DATA__;
    const state = { band: "all", query: "", faculty: "all", mode: "all", sort: "band", selectedId: null, limit: 250 };
    const bandOrder = { red: 0, amber: 1, green: 2 };

    const textValue = (row, key) => String(row[key] ?? "");
    const numeric = (value, fallback = 0) => {
      const number = Number(value);
      return Number.isFinite(number) ? number : fallback;
    };
    const compareText = (a, b, key) => textValue(a, key).localeCompare(textValue(b, key));
    const defaultTieBreak = (a, b) => compareText(a, b, "canonical_student_id");
    const sorters = {
      band: (a, b) => (bandOrder[a.risk_band] - bandOrder[b.risk_band]) || (numeric(b.risk_score) - numeric(a.risk_score)) || defaultTieBreak(a, b),
      risk_desc: (a, b) => (numeric(b.risk_score) - numeric(a.risk_score)) || defaultTieBreak(a, b),
      risk_asc: (a, b) => (numeric(a.risk_score) - numeric(b.risk_score)) || defaultTieBreak(a, b),
      gpa_asc: (a, b) => (numeric(a.prior_gpa, 999) - numeric(b.prior_gpa, 999)) || (numeric(b.risk_score) - numeric(a.risk_score)),
      missing_desc: (a, b) => (numeric(b.missing_assignments_to_date) - numeric(a.missing_assignments_to_date)) || (numeric(b.risk_score) - numeric(a.risk_score)),
      overdue_desc: (a, b) => (numeric(b.payment_overdue_days) - numeric(a.payment_overdue_days)) || (numeric(b.risk_score) - numeric(a.risk_score)),
      balance_desc: (a, b) => (numeric(b.outstanding_balance_nok) - numeric(a.outstanding_balance_nok)) || (numeric(b.risk_score) - numeric(a.risk_score)),
      lms_asc: (a, b) => (numeric(a.lms_logins_14d, 999999) - numeric(b.lms_logins_14d, 999999)) || (numeric(b.risk_score) - numeric(a.risk_score)),
      faculty_asc: (a, b) => compareText(a, b, "faculty_asof") || compareText(a, b, "programme_asof") || (numeric(b.risk_score) - numeric(a.risk_score)),
      student_asc: (a, b) => compareText(a, b, "canonical_student_id"),
      advisor_asc: (a, b) => compareText(a, b, "advisor_id") || (numeric(b.risk_score) - numeric(a.risk_score)),
    };

    function pct(value, digits = 1) {
      return `${(Number(value) * 100).toFixed(digits)}%`;
    }

    function formatInt(value) {
      return numeric(value).toLocaleString();
    }

    function formatNok(value) {
      return `NOK ${numeric(value).toLocaleString()}`;
    }

    function escapeHtml(value) {
      return String(value ?? "").replace(/[&<>"']/g, char => ({
        "&": "&amp;",
        "<": "&lt;",
        ">": "&gt;",
        '"': "&quot;",
        "'": "&#39;",
      }[char]));
    }

    function renderSummary() {
      document.getElementById("asOf").textContent = data.as_of_date;
      document.getElementById("modelVersion").textContent = data.model_version;
      document.getElementById("generatedAt").textContent = data.generated_at;
      document.getElementById("studentsScored").textContent = data.students_scored.toLocaleString();
      document.getElementById("redCount").textContent = data.band_counts.red.toLocaleString();
      document.getElementById("amberCount").textContent = data.band_counts.amber.toLocaleString();
      const holdout = data.validation.find(row => row.dataset === "2025A_holdout_point_in_time") || data.validation[0];
      document.getElementById("aucValue").textContent = Number(holdout.roc_auc).toFixed(3);
    }

    function populateSelect(id, values) {
      const select = document.getElementById(id);
      const firstOption = select.querySelector("option[value='all']");
      select.replaceChildren(firstOption);
      for (const value of values) {
        const option = document.createElement("option");
        option.value = value;
        option.textContent = value;
        select.appendChild(option);
      }
    }

    function renderFilters() {
      const faculties = [...new Set(data.students.map(row => row.faculty_asof))].sort();
      const modes = [...new Set(data.students.map(row => row.preferred_study_mode))].sort();
      populateSelect("facultyFilter", faculties);
      populateSelect("modeFilter", modes);
    }

    function filteredStudents() {
      const query = state.query.trim().toLowerCase();
      const sorter = sorters[state.sort] || sorters.band;
      return data.students.filter(row => {
        const matchesBand = state.band === "all" || row.risk_band === state.band;
        const matchesFaculty = state.faculty === "all" || row.faculty_asof === state.faculty;
        const matchesMode = state.mode === "all" || row.preferred_study_mode === state.mode;
        const haystack = [
          row.canonical_student_id,
          row.advisor_id,
          row.faculty_asof,
          row.programme_asof,
          row.preferred_study_mode,
          row.academic_standing,
          row.aid_status,
          row.top_reasons,
          row.feature_snapshot_hash,
        ].join(" ").toLowerCase();
        return matchesBand && matchesFaculty && matchesMode && (!query || haystack.includes(query));
      }).sort(sorter);
    }

    function updateSortUi() {
      const select = document.getElementById("sortSelect");
      if ([...select.options].some(option => option.value === state.sort)) {
        select.value = state.sort;
      }
      document.querySelectorAll(".sort-button").forEach(button => {
        const isActive = button.dataset.sort === state.sort || button.dataset.altSort === state.sort;
        button.classList.toggle("active", isActive);
        const icon = button.querySelector(".sort-icon");
        if (icon) {
          icon.textContent = isActive ? (state.sort.endsWith("_asc") ? "^" : "v") : "";
        }
      });
    }

    function renderStudentDetail(row) {
      const detail = document.getElementById("studentDetail");
      const detailBand = document.getElementById("detailBand");
      if (!row) {
        detailBand.innerHTML = "";
        detail.innerHTML = `<div class="empty">No matching student selected.</div>`;
        return;
      }

      detailBand.innerHTML = `<span class="band ${escapeHtml(row.risk_band)}">${escapeHtml(row.risk_band)}</span>`;
      const detailRows = [
        ["Risk score", pct(row.risk_score, 1)],
        ["Prior GPA", numeric(row.prior_gpa).toFixed(2)],
        ["Credits", `${formatInt(row.credits_completed_previous_term)} / ${formatInt(row.credits_registered)}`],
        ["Academic standing", row.academic_standing],
        ["Study mode", row.preferred_study_mode],
        ["Aid status", row.aid_status],
        ["LMS logins, 14d", formatInt(row.lms_logins_14d)],
        ["Days since LMS", formatInt(row.days_since_last_lms_login)],
        ["Course views, 14d", formatInt(row.lms_course_views_14d)],
        ["Video minutes, 14d", formatInt(row.video_minutes_14d)],
        ["Forum posts, 14d", formatInt(row.forum_posts_14d)],
        ["Quiz attempts, 14d", formatInt(row.quiz_attempts_14d)],
        ["Missing assignments", formatInt(row.missing_assignments_to_date)],
        ["Late submissions", formatInt(row.late_submissions_to_date)],
        ["Submission rate", pct(row.submission_completion_rate_to_date, 0)],
        ["Campus days, 14d", formatInt(row.campus_days_14d)],
        ["Campus drop", formatInt(row.campus_drop_14d_vs_start)],
        ["Building entries, 14d", formatInt(row.building_entries_14d)],
        ["Library entries, 30d", formatInt(row.library_entries_30d)],
        ["Outstanding balance", formatNok(row.outstanding_balance_nok)],
        ["Overdue days", formatInt(row.payment_overdue_days)],
      ];

      detail.innerHTML = `
        <div class="detail-head">
          <div>
            <strong>${escapeHtml(row.canonical_student_id)}</strong>
            <div class="detail-meta">${escapeHtml(row.faculty_asof)} / ${escapeHtml(row.programme_asof)}<br>Advisor ${escapeHtml(row.advisor_id)}</div>
          </div>
          <div class="score">${pct(row.risk_score, 1)}</div>
        </div>
        <div class="detail-list">
          ${detailRows.map(([label, value]) => `
            <div class="detail-item">
              <div class="label">${escapeHtml(label)}</div>
              <strong>${escapeHtml(value)}</strong>
            </div>
          `).join("")}
        </div>
        <div class="reason-box">
          <div class="label">Top reasons</div>
          ${escapeHtml(row.top_reasons)}
        </div>
        <div class="reason-box">
          <div class="label">Audit fields</div>
          Prediction ${escapeHtml(row.prediction_id)}<br>
          Snapshot ${escapeHtml(row.feature_snapshot_hash)}
        </div>
      `;
    }

    function renderRiskRows() {
      const tbody = document.getElementById("riskRows");
      const rows = filteredStudents();
      const displayRows = rows.slice(0, state.limit);
      document.getElementById("emptyState").hidden = rows.length > 0;
      document.getElementById("queueCount").textContent = `${rows.length.toLocaleString()} matching student${rows.length === 1 ? "" : "s"}`;
      document.getElementById("queueLimit").textContent = rows.length > state.limit
        ? `Showing first ${state.limit.toLocaleString()}`
        : "All matching rows shown";

      if (!displayRows.length) {
        state.selectedId = null;
        tbody.innerHTML = "";
        renderStudentDetail(null);
        updateSortUi();
        return;
      }

      if (!state.selectedId || !displayRows.some(row => row.canonical_student_id === state.selectedId)) {
        state.selectedId = displayRows[0].canonical_student_id;
      }

      tbody.innerHTML = displayRows.map(row => `
        <tr data-student-id="${escapeHtml(row.canonical_student_id)}" class="${row.canonical_student_id === state.selectedId ? "selected" : ""}">
          <td><span class="band ${escapeHtml(row.risk_band)}">${escapeHtml(row.risk_band)}</span></td>
          <td><strong>${escapeHtml(row.canonical_student_id)}</strong><br><span class="hint">${escapeHtml(row.feature_snapshot_hash)}</span></td>
          <td>${escapeHtml(row.faculty_asof)}<br><span class="hint">${escapeHtml(row.programme_asof)}</span></td>
          <td><span class="score">${pct(row.risk_score, 1)}</span><br><span class="hint">GPA ${numeric(row.prior_gpa).toFixed(2)}</span></td>
          <td>
            <div class="signal-list">
              <span>Missing <strong>${formatInt(row.missing_assignments_to_date)}</strong></span>
              <span>LMS <strong>${formatInt(row.lms_logins_14d)}</strong></span>
              <span>Overdue <strong>${formatInt(row.payment_overdue_days)}</strong></span>
            </div>
          </td>
          <td class="reasons"><div class="reason-preview" title="${escapeHtml(row.top_reasons)}">${escapeHtml(row.top_reasons)}</div></td>
          <td>${escapeHtml(row.advisor_id)}<br><span class="hint">${escapeHtml(row.preferred_study_mode)}</span></td>
        </tr>
      `).join("");

      renderStudentDetail(displayRows.find(row => row.canonical_student_id === state.selectedId));
      updateSortUi();
    }

    function renderFacultyBars() {
      const maxRisk = Math.max(...data.faculty.map(row => row.avg_risk));
      document.getElementById("facultyBars").innerHTML = data.faculty.map(row => `
        <div class="bar-row">
          <div><strong>${row.faculty_asof}</strong><br><span class="hint">${row.students} students</span></div>
          <div class="bar-track"><div class="bar-fill" style="width:${Math.max(4, row.avg_risk / maxRisk * 100)}%"></div></div>
          <div class="score">${pct(row.avg_risk, 1)}</div>
        </div>
      `).join("");
    }

    function renderMetrics() {
      const holdout = data.validation.find(row => row.dataset === "2025A_holdout_point_in_time") || data.validation[0];
      const rows = [
        ["ROC-AUC", Number(holdout.roc_auc).toFixed(4)],
        ["Precision at capacity", pct(holdout.precision_at_capacity, 1)],
        ["Recall at capacity", pct(holdout.recall_at_capacity, 1)],
        ["False positive rate", pct(holdout.false_positive_rate_at_capacity, 1)],
        ["Flagged fraction", pct(holdout.flagged_fraction, 1)],
      ];
      document.getElementById("metricsRows").innerHTML = rows.map(([label, value]) => `<tr><td>${label}</td><td><strong>${value}</strong></td></tr>`).join("");
    }

    function renderFairness() {
      document.getElementById("fairnessRows").innerHTML = data.fairness.map(row => `
        <tr>
          <td>${row.group}</td>
          <td>${row.value}</td>
          <td>${row.students}</td>
          <td>${pct(row.label_rate_demo_only, 1)}</td>
          <td>${pct(row.avg_risk_score, 1)}</td>
          <td>${pct(row.flag_rate, 1)}</td>
          <td>${pct(row.recall_if_labels_later_confirm, 1)}</td>
          <td>${pct(row.false_positive_rate_if_labels_later_confirm, 1)}</td>
        </tr>
      `).join("");
    }

    function bindEvents() {
      document.getElementById("search").addEventListener("input", event => {
        state.query = event.target.value;
        renderRiskRows();
      });
      document.getElementById("facultyFilter").addEventListener("change", event => {
        state.faculty = event.target.value;
        renderRiskRows();
      });
      document.getElementById("modeFilter").addEventListener("change", event => {
        state.mode = event.target.value;
        renderRiskRows();
      });
      document.getElementById("sortSelect").addEventListener("change", event => {
        state.sort = event.target.value;
        state.selectedId = null;
        renderRiskRows();
      });
      document.querySelectorAll(".tabs button").forEach(button => {
        button.addEventListener("click", () => {
          document.querySelectorAll(".tabs button").forEach(btn => btn.classList.remove("active"));
          button.classList.add("active");
          state.band = button.dataset.band;
          renderRiskRows();
        });
      });
      document.querySelectorAll(".sort-button").forEach(button => {
        button.addEventListener("click", () => {
          state.sort = state.sort === button.dataset.sort && button.dataset.altSort ? button.dataset.altSort : button.dataset.sort;
          state.selectedId = null;
          renderRiskRows();
        });
      });
      document.getElementById("riskRows").addEventListener("click", event => {
        const row = event.target.closest("tr[data-student-id]");
        if (!row) return;
        state.selectedId = row.dataset.studentId;
        renderRiskRows();
      });
      document.getElementById("resetFilters").addEventListener("click", () => {
        state.band = "all";
        state.query = "";
        state.faculty = "all";
        state.mode = "all";
        state.sort = "band";
        state.selectedId = null;
        document.getElementById("search").value = "";
        document.getElementById("facultyFilter").value = "all";
        document.getElementById("modeFilter").value = "all";
        document.querySelectorAll(".tabs button").forEach(button => button.classList.toggle("active", button.dataset.band === "all"));
        renderRiskRows();
      });
    }

    renderSummary();
    renderFilters();
    renderRiskRows();
    renderFacultyBars();
    renderMetrics();
    renderFairness();
    bindEvents();
  </script>
</body>
</html>
"""


def run_demo(out_dir: Path, n_students: int, seed: int, open_dashboard: bool) -> None:
    sources = generate_sources(n_students=n_students, seed=seed)

    train_snapshots = [
        build_feature_snapshot(sources, "2025A", pd.Timestamp("2025-09-15")),
        build_feature_snapshot(sources, "2025A", pd.Timestamp("2025-10-06")),
        build_feature_snapshot(sources, "2025A", pd.Timestamp("2025-10-27")),
    ]
    train_all = pd.concat(train_snapshots, ignore_index=True)
    score_features = build_feature_snapshot(sources, "2026S", pd.Timestamp("2026-02-23"))

    numeric_cols = [
        "prior_gpa",
        "credits_registered",
        "credits_completed_previous_term",
        "term_week",
        "lms_logins_14d",
        "lms_course_views_14d",
        "video_minutes_14d",
        "forum_posts_14d",
        "quiz_attempts_14d",
        "days_since_last_lms_login",
        "missing_assignments_to_date",
        "late_submissions_to_date",
        "submission_completion_rate_to_date",
        "outstanding_balance_log1p",
        "payment_overdue_days",
        "financial_overdue_flag",
        "campus_days_14d",
        "building_entries_14d",
        "library_entries_30d",
        "campus_drop_14d_vs_start",
    ]
    categorical_cols = ["faculty_asof", "academic_standing", "preferred_study_mode", "aid_status", "student_status_asof"]

    # Protected and sensitive demographic fields are retained for audit reporting, not default model input.
    x_train, x_score, train_design, means, stds = prepare_design_matrix(train_all, score_features, numeric_cols, categorical_cols)
    y_train = train_all["dropout_label_next_term"].astype(int).to_numpy()
    model = NumpyLogisticRegression().fit(x_train, y_train)
    score_features = score_features.copy()
    score_features["risk_score"] = model.predict_proba(x_score)
    score_features["risk_band"] = assign_bands(score_features["risk_score"])
    score_features["model_version"] = MODEL_VERSION
    score_features["prediction_id"] = [f"PRED-2026S-{i:06d}" for i in range(1, len(score_features) + 1)]
    score_features["top_reasons"] = score_features.apply(lambda row: " | ".join(reason_codes(row)), axis=1)
    hash_cols = numeric_cols + categorical_cols + ["as_of_date", "term_id"]
    score_features["feature_snapshot_hash"] = score_features.apply(lambda row: feature_hash(row, hash_cols), axis=1)

    # Demo-only validation: in a real deployment, the 2026S labels are unknown at scoring time.
    rng = np.random.default_rng(seed + 101)
    mask = rng.random(len(train_all)) < 0.25
    holdout = train_all.loc[mask].copy()
    train_part = train_all.loc[~mask].copy()
    x_train_part, x_holdout, train_design_part, means_part, stds_part = prepare_design_matrix(
        train_part, holdout, numeric_cols, categorical_cols
    )
    holdout_model = NumpyLogisticRegression().fit(x_train_part, train_part["dropout_label_next_term"].astype(int).to_numpy())
    holdout_scores = holdout_model.predict_proba(x_holdout)
    holdout_metrics = classification_metrics(holdout["dropout_label_next_term"].astype(int).to_numpy(), holdout_scores, 0.12)
    demo_metrics = classification_metrics(
        score_features["dropout_label_next_term"].astype(int).to_numpy(), score_features["risk_score"].to_numpy(), 0.12
    )
    validation = pd.DataFrame(
        [
            {"dataset": "2025A_holdout_point_in_time", **{k: round(v, 4) for k, v in holdout_metrics.items()}},
            {"dataset": "2026S_labels_for_demo_only", **{k: round(v, 4) for k, v in demo_metrics.items()}},
        ]
    )

    threshold = float(np.quantile(score_features["risk_score"], 0.88))
    fairness = fairness_report(score_features, threshold)
    dashboard_path = write_outputs(sources, score_features, validation, fairness, out_dir)

    red = int(score_features["risk_band"].eq("red").sum())
    amber = int(score_features["risk_band"].eq("amber").sum())
    print(f"Wrote outputs to: {out_dir.resolve()}")
    print(f"Students scored: {len(score_features):,}")
    print(f"Advisor bands: red={red}, amber={amber}, green={len(score_features) - red - amber}")
    print(f"Validation ROC-AUC (holdout): {validation.loc[0, 'roc_auc']}")
    print(f"Advisor list: {(out_dir / 'advisor_risk_list.csv').resolve()}")
    print(f"Dashboard: {dashboard_path.resolve()}")
    if open_dashboard:
        opened = webbrowser.open(dashboard_path.resolve().as_uri())
        if opened:
            print("Opened dashboard in your default browser.")
        else:
            print("Could not open the browser automatically; open the dashboard path above.")


def main() -> None:
    parser = argparse.ArgumentParser(description="Generate a dummy Student Outcome Intelligence Platform demo.")
    parser.add_argument("--out", default="outputs", help="Output folder.")
    parser.add_argument("--students", type=int, default=1200, help="Number of dummy students to generate.")
    parser.add_argument("--seed", type=int, default=42, help="Random seed.")
    parser.add_argument(
        "--no-open-dashboard",
        action="store_true",
        help="Generate the HTML dashboard but do not open it in a browser.",
    )
    args = parser.parse_args()
    run_demo(Path(args.out), n_students=args.students, seed=args.seed, open_dashboard=not args.no_open_dashboard)


if __name__ == "__main__":
    main()
