#!/usr/bin/env python3
"""American Distress Index (ADI) — family-v1 production engine (national level).

The national member of the three-index family (ADI / SDI / CDI). SDI and CDI
rank places against other places at one point in time; ADI ranks the present
quarter against every quarter in the nation's own history, using the same
five-domain taxonomy, the same Hazen percentile transform, the same
orientation rule (every input flagged so higher = more distress), and the
same registry-derived equal-domain weighting.

Formula (each step implemented below, no fitted constants anywhere):
  1. Registry: ten input series under data/indicators/** (the daily-refreshed
     library with direction metadata), each with a domain assignment.
  2. Quarterly aggregation: a series-quarter exists only if at least one raw
     observation is dated inside it; its value is the arithmetic mean of the
     observations present. No interpolation, no carry-forward.
  3. Orientation: where the JSON direction field says lower_is_worse
     (savings_rate only), the quarterly values are negated. After this step
     higher always means more distress. Missing or unexpected direction
     values abort the build (fail closed).
  4. Normalization: one pass per series over its ENTIRE available quarterly
     history — Hazen percentile (average_rank - 0.5) / n * 100, ties
     averaged. One yardstick: no regime split, no baseline window, no
     winsorization, no z-cap.
  5. Domain score: unweighted mean of the percentiles of the domain members
     present at that quarter (at least one required); members_present is
     published per domain per quarter.
  6. Composite: sum over the five domains of domain_weight x domain_score,
     domain_weight = 1 / len(DOMAIN_IDS), computed from the registry.
  7. Publication: a quarter publishes only when all five domains have at
     least one member present.
  8. Scale: the composite is already 0-100 — an average percentile of the
     nation's own history. No further mapping.
  9. Bands: uniform 20-point segments derived as i * 100 / N_BANDS. Band
     labels locked by editorial decision 2026-06-09: Minimal / Low / Typical
     / High / Severe, severity ascending with the band number (the family
     band-direction rule: a bigger band number always means more distress).
     Labels describe position within the nation's own history; they are not
     calibrated to narrative periods. The label always publishes alongside
     the literal reading ("on average, its inputs sit higher than in N% of
     their own quarterly histories") — the composite is a mean of
     percentiles, never quoted as a percentile of quarters; the composite's
     own quarters-rank is emitted separately as rank_in_history. The time
     axis gets adjectives; places (SDI/CDI) get ranks.
 10. Build gates (any failure = no output written): seam, GFC window,
     orientation perturbation, in-process byte-reproducibility, weights
     uniformity, band-label lock, vintage-honesty statement presence.

Run from the repo root:
    PYTHONPATH=. python3 scripts/indexes/compute_adi.py

Reads only committed files under data/indicators/**; writes
data/indexes/adi.json and data/indexes/adi.csv. Deterministic: no network,
no wall-clock timestamps (output last_updated = max of input last_updated
values), stable ordering, fixed rounding. Two runs on the same inputs are
byte-identical (gated here and re-verified across processes by
scripts/indexes/validate_adi.py — run the validator with an interpreter
that has scipy, e.g. the repo venv; the engine itself is stdlib-only).
"""
from __future__ import annotations

import json
import logging
import math
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Sequence, Tuple

THIS_DIR = Path(__file__).resolve().parent
REPO_ROOT = THIS_DIR.parents[1]
OUTPUT_DIR = REPO_ROOT / "data" / "indexes"

if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

try:
    from scripts.indexes.family_normalization import hazen_percentiles_series  # noqa: E402
    from scripts.indicators.schema import date_to_quarter  # noqa: E402
except ImportError as exc:  # pragma: no cover
    raise SystemExit(
        "Cannot import scripts.indexes.family_normalization / "
        "scripts.indicators.schema. Run from the repo root with PYTHONPATH=. "
        "— see the module docstring."
    ) from exc

logger = logging.getLogger("adi")

# ---------------------------------------------------------------------------
# Registry — the only place inputs, domains, and paths are declared.
# Orientation is NOT declared here: it is read from each JSON's direction
# field at load time and the build fails closed if the field is absent.
# ---------------------------------------------------------------------------

TAXONOMY_VERSION = "family-v1"

DOMAIN_IDS: Tuple[str, ...] = (
    "delinquency",
    "default_legal",
    "debt_burden",
    "labor",
    "safety_net_buffer",
)

N_BANDS = 5

# Locked by editorial decision 2026-06-09. Severity ascends with the band
# number (family band-direction rule); labels describe position within the
# nation's own history. The site always pairs the label with the literal
# reading; SDI/CDI never use these labels — places get ranks, not adjectives.
BAND_LABELS: Tuple[str, ...] = ("Minimal", "Low", "Typical", "High", "Severe")

VALID_DIRECTIONS = ("higher_is_worse", "lower_is_worse")


@dataclass(frozen=True)
class InputSpec:
    input_id: str
    rel_path: str
    domain: str


REGISTRY: Tuple[InputSpec, ...] = (
    InputSpec("mortgage_delinquency", "data/indicators/door1_debt/mortgage_delinquency.json", "delinquency"),
    InputSpec("credit_card_delinquency", "data/indicators/door1_debt/credit_card_delinquency.json", "delinquency"),
    InputSpec("consumer_loan_delinquency", "data/indicators/door1_debt/consumer_loan_delinquency.json", "delinquency"),
    # Canonical auto series. data/indicators/door1_debt/auto_delinquency_nyfed.json
    # carries identical dates and values (verified 2026-06-09) under a separate
    # registration in scripts/indicators/config.py; it is dispositioned in
    # registry_exclusions so the domain cannot double-count, and the duplicate
    # registration retires in the index-launch clean-break unit.
    InputSpec("auto_loan_delinquency", "data/indicators/door1_debt/auto_loan_delinquency.json", "delinquency"),
    InputSpec("credit_card_chargeoff", "data/indicators/door1_debt/credit_card_chargeoff.json", "default_legal"),
    # Lender-loss measure: label "mortgage charge-offs", never "foreclosures".
    InputSpec("foreclosure_chargeoff_proxy", "data/indicators/door2_legal/foreclosure_chargeoff_proxy.json", "default_legal"),
    InputSpec("debt_service_ratio", "data/indicators/door3_buffer/debt_service_ratio.json", "debt_burden"),
    InputSpec("unemployment_rate", "data/indicators/door4_income/unemployment_rate.json", "labor"),
    InputSpec("initial_claims", "data/indicators/door4_income/initial_claims.json", "labor"),
    InputSpec("savings_rate", "data/indicators/door3_buffer/savings_rate.json", "safety_net_buffer"),
)

DOMAIN_MEMBERS: Dict[str, Tuple[str, ...]] = {
    d: tuple(s.input_id for s in REGISTRY if s.domain == d) for d in DOMAIN_IDS
}

# Registry membership criterion (closes cold-review MEDIUM-1, 2026-06-09
# docs/audit/cold_reviews/2026-06-09_index_rebuild_adi.md): a committed series
# joins iff (a) it measures a household-distress LEVEL (not a spread or
# derived difference), (b) it has quarterly-resolvable history spanning
# 2005Q1-present, and (c) within its domain it is neither a population
# subset/superset of an existing member nor lead-redundant with one. Every
# committed family-fitting candidate is dispositioned here; adding or removing
# a member is a versioned registry diff, never a silent edit.
MEMBERSHIP_CRITERION = (
    "A committed series joins the registry only if (a) it measures a "
    "household-distress rate or share — not a spread or derived difference, "
    "and not a nominal dollar or employment-count total whose full-history "
    "percentile tracks trend growth rather than household condition; "
    "(b) it has quarterly-resolvable history spanning 2005Q1 to the present; "
    "and (c) within its domain it is neither a population subset or superset "
    "of an existing member, nor a data-identical duplicate, nor "
    "level-redundant with one, nor distorted by a reporting-regime break. "
    "Every committed series under data/indicators/door*/ with a direction "
    "field and that span is either a registry member or carries a written "
    "disposition in registry_exclusions — enforced by the dispositions build "
    "gate, which fails closed when a new candidate appears undispositioned. "
    "Membership changes are versioned registry diffs."
)


@dataclass(frozen=True)
class ExcludedSpec:
    candidate_id: str
    series: str
    reason: str


# Measured materiality of the four rankable rate exclusions (recorded in the
# 2026-06-09 cold review): admitting DRCCLOBS moves 2025-Q4 from 44.62 to
# 45.92; admitting all four moves it to 47.51; peak/trough/milestone story
# invariant under every combination. The omissions tilt the current reading
# DOWN, so exclusion is conservative rather than narrative-serving.
# Every disposition fact below is verified against the committed files
# (correlations computed on the committed quarterly series, 106 common
# quarters, 2026-06-09).
EXCLUDED: Tuple[ExcludedSpec, ...] = (
    ExcludedSpec(
        "small_bank_cc_delinquency", "FRED DRCCLOBS",
        "population subset: the member DRCCLACBS covers credit-card "
        "delinquency at ALL commercial banks including the other-banks "
        "subset; admitting both double-weights card delinquency.",
    ),
    ExcludedSpec(
        "continuing_claims", "FRED CCSA",
        "mechanically downstream of the member initial_claims: continuing "
        "claims count prior initial claimants still drawing benefits "
        "(contemporaneous r=0.92 on the committed quarterly sample); the "
        "labor domain already carries the flow member (initial_claims) and "
        "the stock member (unemployment_rate).",
    ),
    ExcludedSpec(
        "u6_underemployment", "FRED U6RATE",
        "definitional superset: U-6 contains the U-3 unemployed count that "
        "the member unemployment_rate measures; admitting both double-counts "
        "every unemployed worker.",
    ),
    ExcludedSpec(
        "mortgage_debt_service", "FRED MDSP",
        "strict component of the member debt_service_ratio (TDSP = mortgage "
        "MDSP + consumer CDSP); admitting both double-counts mortgage debt "
        "service.",
    ),
    ExcludedSpec(
        "bank_delinquency_spread", "derived spread",
        "a derived difference between two bank delinquency rates, not a "
        "household-distress level; fails criterion (a) and has no stable "
        "distress orientation.",
    ),
    ExcludedSpec(
        "auto_delinquency_nyfed", "NY Fed CCP",
        "data-identical duplicate of the member auto_loan_delinquency "
        "(identical dates and values on all 92 quarters, verified "
        "2026-06-09) under a separate registration in "
        "scripts/indicators/config.py; the duplicate registration retires "
        "in the index-launch clean-break unit.",
    ),
    ExcludedSpec(
        "serious_delinquency_rate", "NY Fed CCP",
        "population superset: the all-product 90+ aggregate contains the "
        "product-level delinquency the domain's members measure "
        "individually; admitting both double-counts every delinquent "
        "balance.",
    ),
    ExcludedSpec(
        "total_delinquency_rate", "NY Fed CCP",
        "population superset: the all-product 30+ aggregate contains the "
        "product-level delinquency the domain's members measure "
        "individually; same class as serious_delinquency_rate.",
    ),
    ExcludedSpec(
        "student_loan_delinquency", "NY Fed CCP",
        "reporting-regime break: the federal payment moratorium held "
        "reported delinquency near zero by administrative action and the "
        "2025 resumption of credit-bureau reporting restored it as a step, "
        "so the series measures reporting policy as much as household "
        "behavior across the sample; excluded until a regime-consistent "
        "treatment exists (a future versioned registry diff).",
    ),
    ExcludedSpec(
        "parttime_economic_reasons", "BLS LNS12032194",
        "level-redundant with the member unemployment_rate (r=0.90 on the "
        "committed quarterly sample); the labor domain carries one stock "
        "and one flow member by design.",
    ),
    ExcludedSpec(
        "jolts_quits", "BLS JTSQUR",
        "measures labor-market churn and worker confidence, not a "
        "household-distress rate; fails criterion (a).",
    ),
    ExcludedSpec(
        "nonfarm_payrolls", "FRED PAYEMS",
        "a trend-dominated employment count that scales with population; "
        "its full-history percentile tracks growth, not household "
        "condition; fails criterion (a).",
    ),
    ExcludedSpec(
        "atlanta_fed_wage_q1", "Atlanta Fed wage tracker",
        "a wage-growth rate, not a household-distress rate; fails "
        "criterion (a).",
    ),
    ExcludedSpec(
        "wage_inflation_gap", "Bank of America Institute",
        "a derived spread (wage growth minus inflation), not a "
        "household-distress level; fails criterion (a), same class as "
        "bank_delinquency_spread.",
    ),
    ExcludedSpec(
        "bankruptcy_chargeoff_proxy", "FRED CORALACBN",
        "population superset and scope mismatch: charge-offs on ALL loans "
        "contain the member card and mortgage charge-offs and add "
        "commercial lending, which is outside household distress.",
    ),
    ExcludedSpec(
        "consumer_credit_outstanding", "FRED TOTALSL",
        "nominal dollar total: its full-history percentile tracks trend "
        "growth in population, income, and prices, not household "
        "condition; fails criterion (a).",
    ),
    ExcludedSpec(
        "credit_card_debt_total", "NY Fed CCP",
        "nominal dollar total; same class as consumer_credit_outstanding.",
    ),
    ExcludedSpec(
        "heloc_balance", "NY Fed CCP",
        "nominal dollar total; same class as consumer_credit_outstanding.",
    ),
    ExcludedSpec(
        "household_debt_total", "NY Fed CCP",
        "nominal dollar total; same class as consumer_credit_outstanding.",
    ),
    ExcludedSpec(
        "revolving_credit", "FRED REVOLSL",
        "nominal dollar total; same class as consumer_credit_outstanding.",
    ),
    ExcludedSpec(
        "mortgage_origination_volume", "NY Fed CCP",
        "nominal flow volume driven by the rate cycle and trend growth; "
        "low origination can mean tight credit or weak demand, so it has "
        "no stable distress orientation; fails criterion (a).",
    ),
)

VINTAGE_HONESTY = (
    "Inputs are revised series (Federal Reserve Board bank-condition rates via "
    "FRED, NY Fed Consumer Credit Panel, BLS, DOL, and BEA series via FRED). "
    "Historical ADI values are computed on today's revised vintages, not on the "
    "data as first published. Every run restates the full history: the percentile "
    "yardstick grows by one quarter per refresh, trailing partial quarters fill "
    "in as observations land, and upstream revisions re-rank past quarters. No "
    "out-of-sample claim is made for any historical reading, including the "
    "2008-2010 stretch."
)

FAMILY_COVERAGE_NOTES = {
    "default_legal": (
        "Carries the two household charge-off series only. The legal leg "
        "(bankruptcy filings) is absent nationally because committed US Courts "
        "quarterly coverage has a 2015Q2-2023Q2 hole; after the F-2 quarterly "
        "backfill it joins as an explicit registry diff. The mortgage leg "
        "(CORSFRMACBS) measures lender-loss foreclosures and reads near its "
        "floor in high-home-equity cycles while foreclosure filings rise, so "
        "terminal housing distress is understated; label it 'mortgage "
        "charge-offs', never 'foreclosures'."
    ),
    "safety_net_buffer": (
        "Carries the private-buffer member only (personal saving rate, "
        "inverted). Committed public-assistance data lacks national history "
        "spanning 2005 forward (the committed SNAP file starts 2019-10); a "
        "long national assistance series joins later as a registry diff."
    ),
    "debt_burden": (
        "Single member (household debt service ratio, FRED TDSP). The committed "
        "file starts 2005Q1, which binds the index start and the yardstick "
        "span. FRED publishes TDSP back to 1980; extending the pull to the "
        "full history is a planned versioned registry diff before site "
        "cutover (method unchanged)."
    ),
}


# ---------------------------------------------------------------------------
# Loading and aggregation
# ---------------------------------------------------------------------------


@dataclass(frozen=True)
class LoadedInput:
    spec: InputSpec
    direction: str
    last_updated: str
    source: str
    fred_series_id: Optional[str]
    name: str
    unit: str
    quarterly_raw: Dict[str, float]  # quarter key -> mean of in-quarter observations


@dataclass(frozen=True)
class Perturbation:
    """A raw-scale perturbation for the orientation gate.

    mode 'unit'    : one raw unit in the distress direction per the JSON
                     direction field (+1 for higher_is_worse, -1 for
                     lower_is_worse).
    mode 'extreme' : the raw value is set one unit beyond the series'
                     worst-ever raw value in the distress direction, which
                     must move the quarter to (or keep it at) the top rank.
    The sign comes from the direction field read out of the JSON — not from
    the pipeline's own orientation step — so a wiring error in orientation
    consumption makes this gate fail.
    """

    input_id: str
    quarter: str
    mode: str  # "unit" | "extreme"


def quarter_index(q: str) -> int:
    year, qq = q.split("-Q")
    return int(year) * 4 + int(qq) - 1


def load_inputs(repo_root: Path) -> Dict[str, LoadedInput]:
    loaded: Dict[str, LoadedInput] = {}
    for spec in REGISTRY:
        raw = json.loads((repo_root / spec.rel_path).read_text())
        direction = raw.get("direction")
        if direction not in VALID_DIRECTIONS:
            raise SystemExit(
                f"FAIL CLOSED: {spec.input_id} direction field is {direction!r}; "
                f"expected one of {VALID_DIRECTIONS}. Refusing to build."
            )
        by_quarter: Dict[str, List[float]] = {}
        for obs in raw["data"]:
            value = obs.get("value")
            if value is None:
                continue
            by_quarter.setdefault(date_to_quarter(obs["date"]), []).append(float(value))
        if not by_quarter:
            raise SystemExit(f"FAIL CLOSED: {spec.input_id} has zero usable observations.")
        loaded[spec.input_id] = LoadedInput(
            spec=spec,
            direction=direction,
            last_updated=str(raw.get("last_updated", "")),
            source=str(raw.get("source", "")),
            fred_series_id=raw.get("fred_series_id"),
            name=str(raw.get("name", spec.input_id)),
            unit=str(raw.get("unit", "")),
            quarterly_raw={q: sum(vs) / len(vs) for q, vs in by_quarter.items()},
        )
    return loaded


# ---------------------------------------------------------------------------
# Build: orientation -> percentiles -> domain scores -> composite
# ---------------------------------------------------------------------------


@dataclass
class BuildResult:
    published_quarters: List[str]
    composite: Dict[str, float]                  # quarter -> composite (full precision)
    domain_scores: Dict[str, Dict[str, float]]   # quarter -> domain -> score
    members_present: Dict[str, Dict[str, int]]   # quarter -> domain -> count
    member_pct: Dict[str, Dict[str, float]]      # input_id -> quarter -> percentile
    oriented: Dict[str, Dict[str, float]]        # input_id -> quarter -> oriented value


def domain_weight() -> float:
    return 1.0 / len(DOMAIN_IDS)


def member_weight(domain: str) -> float:
    return 1.0 / len(DOMAIN_MEMBERS[domain])


def build(loaded: Dict[str, LoadedInput], perturb: Optional[Perturbation] = None) -> BuildResult:
    # Step 2 output (quarterly_raw) -> optional perturbation -> Step 3 orientation
    oriented: Dict[str, Dict[str, float]] = {}
    for input_id, li in loaded.items():
        quarterly = dict(li.quarterly_raw)
        if perturb is not None and perturb.input_id == input_id:
            if perturb.quarter not in quarterly:
                raise ValueError(f"perturbation quarter {perturb.quarter} absent from {input_id}")
            if perturb.mode == "unit":
                delta = 1.0 if li.direction == "higher_is_worse" else -1.0
                quarterly[perturb.quarter] = quarterly[perturb.quarter] + delta
            elif perturb.mode == "extreme":
                if li.direction == "higher_is_worse":
                    quarterly[perturb.quarter] = max(quarterly.values()) + 1.0
                else:
                    quarterly[perturb.quarter] = min(quarterly.values()) - 1.0
            else:  # pragma: no cover
                raise ValueError(f"unknown perturbation mode {perturb.mode!r}")
        sign = -1.0 if li.direction == "lower_is_worse" else 1.0
        oriented[input_id] = {q: sign * v for q, v in quarterly.items()}

    # Step 4: one full-history Hazen pass per series
    member_pct: Dict[str, Dict[str, float]] = {}
    for input_id, series in oriented.items():
        quarters = sorted(series)  # 'YYYY-Qn' sorts chronologically
        pcts = hazen_percentiles_series([series[q] for q in quarters])
        member_pct[input_id] = dict(zip(quarters, pcts))

    # Steps 5-7: domain scores, composite, publication rule
    all_quarters = sorted({q for series in member_pct.values() for q in series})
    w_domain = domain_weight()
    published: List[str] = []
    composite: Dict[str, float] = {}
    domain_scores: Dict[str, Dict[str, float]] = {}
    members_present: Dict[str, Dict[str, int]] = {}
    for q in all_quarters:
        per_domain: Dict[str, float] = {}
        per_domain_count: Dict[str, int] = {}
        for d in DOMAIN_IDS:
            present = [m for m in DOMAIN_MEMBERS[d] if q in member_pct[m]]
            if present:
                per_domain[d] = sum(member_pct[m][q] for m in present) / len(present)
                per_domain_count[d] = len(present)
        if len(per_domain) == len(DOMAIN_IDS):
            published.append(q)
            composite[q] = sum(w_domain * per_domain[d] for d in DOMAIN_IDS)
            domain_scores[q] = per_domain
            members_present[q] = per_domain_count
    return BuildResult(published, composite, domain_scores, members_present, member_pct, oriented)


# ---------------------------------------------------------------------------
# Gates — any failure means no output is written
# ---------------------------------------------------------------------------


def quantile_linear(values: Sequence[float], p: float) -> float:
    """Linearly interpolated quantile (the numpy default method), implemented
    here so the threshold derivation is visible in this file."""
    s = sorted(values)
    n = len(s)
    if n == 1:
        return s[0]
    h = (n - 1) * p
    lo = math.floor(h)
    if lo + 1 >= n:
        return s[-1]
    return s[lo] + (s[lo + 1] - s[lo]) * (h - lo)


def gate_weights() -> Dict[str, object]:
    w_d = domain_weight()
    ok = abs(w_d * len(DOMAIN_IDS) - 1.0) < 1e-12
    member_ws = {}
    for d in DOMAIN_IDS:
        w_m = member_weight(d)
        member_ws[d] = w_m
        ok = ok and abs(w_m * len(DOMAIN_MEMBERS[d]) - 1.0) < 1e-12
    # Total of domain_weight x member_weight over every (domain, member) cell
    # is len(members)*w_m*w_d summed over domains = sum of w_d = 1.
    total = sum(domain_weight() * member_weight(d) * len(DOMAIN_MEMBERS[d]) for d in DOMAIN_IDS)
    ok = ok and abs(total - 1.0) < 1e-12
    return {
        "pass": ok,
        "domain_weight": w_d,
        "member_weights": member_ws,
        "total_weight": total,
        "rule": "domain_weight = 1/len(DOMAIN_IDS); member_weight = 1/len(domain members); zero hand-typed weight literals",
    }


def gate_seam(result: BuildResult) -> Dict[str, object]:
    qs = result.published_quarters
    deltas = []
    for a, b in zip(qs, qs[1:]):
        if quarter_index(b) - quarter_index(a) == 1:
            deltas.append(abs(result.composite[b] - result.composite[a]))
    threshold = quantile_linear(deltas, 0.95)
    seam = abs(result.composite["2015-Q1"] - result.composite["2014-Q4"])
    return {
        "pass": seam <= threshold,
        "seam_abs_delta_2014Q4_to_2015Q1": round(seam, 4),
        "threshold_p95_of_all_abs_deltas": round(threshold, 4),
        "median_abs_delta": round(quantile_linear(deltas, 0.5), 4),
        "n_deltas": len(deltas),
        "rule": "|ADI(2015-Q1) - ADI(2014-Q4)| must not exceed the 95th percentile of all quarter-over-quarter |delta|",
    }


def gate_gfc(result: BuildResult) -> Dict[str, object]:
    qs = result.published_quarters
    # Published range must be contiguous for rolling windows to be meaningful.
    contiguous = all(quarter_index(b) - quarter_index(a) == 1 for a, b in zip(qs, qs[1:]))
    window = 8
    best_start, best_mean = None, -1.0
    for i in range(len(qs) - window + 1):
        m = sum(result.composite[q] for q in qs[i : i + window]) / window
        if m > best_mean:
            best_mean, best_start = m, i
    start_q = qs[best_start]
    end_q = qs[best_start + window - 1]
    in_gfc = quarter_index("2008-Q1") <= quarter_index(start_q) <= quarter_index("2010-Q4")
    worst_quarters = sorted(qs, key=lambda q: result.composite[q], reverse=True)[:12]
    return {
        "pass": contiguous and in_gfc,
        "published_range_contiguous": contiguous,
        "worst_8q_window": f"{start_q} to {end_q}",
        "worst_8q_window_mean": round(best_mean, 2),
        "worst_12_quarters": worst_quarters,
        "rule": "the worst 8-quarter rolling mean must begin within 2008-2010",
    }


def gate_orientation(loaded: Dict[str, LoadedInput], baseline: BuildResult) -> Dict[str, object]:
    rows = []
    all_pass = True
    for spec in REGISTRY:
        input_id = spec.input_id
        candidates = [q for q in baseline.published_quarters if q in loaded[input_id].quarterly_raw]
        q_star = candidates[-1]
        base_c = baseline.composite[q_star]
        per_input = {"input_id": input_id, "direction": loaded[input_id].direction, "quarter": q_star,
                     "baseline_composite": round(base_c, 4)}
        for mode in ("unit", "extreme"):
            perturbed = build(loaded, Perturbation(input_id, q_star, mode))
            pert_c = perturbed.composite[q_star]
            if mode == "unit":
                ok = pert_c >= base_c - 1e-9
            else:
                # Strict increase expected unless the quarter already held the
                # series' unique top rank before the perturbation.
                series = baseline.oriented[input_id]
                v_star = series[q_star]
                top = max(series.values())
                already_unique_top = v_star == top and sum(1 for v in series.values() if v == top) == 1
                ok = (pert_c >= base_c - 1e-9) if already_unique_top else (pert_c > base_c + 1e-9)
            per_input[f"{mode}_composite"] = round(pert_c, 4)
            per_input[f"{mode}_pass"] = ok
            all_pass = all_pass and ok
        rows.append(per_input)
    return {
        "pass": all_pass,
        "per_input": rows,
        "rule": (
            "for every input, perturbing its latest published quarter in the distress "
            "direction (sign read from the JSON direction field, applied on the raw "
            "scale) must not decrease the composite at that quarter; the to-worst-ever "
            "perturbation must strictly increase it unless the quarter already held the "
            "series' unique top rank"
        ),
    }


# ---------------------------------------------------------------------------
# Output assembly
# ---------------------------------------------------------------------------


def band_for(composite_value: float) -> Tuple[int, str, str]:
    width = 100.0 / N_BANDS
    band = min(int(composite_value // width) + 1, N_BANDS)
    low = round((band - 1) * width)
    high = round(band * width)
    return band, f"{low}-{high}", BAND_LABELS[band - 1]


def enumerate_candidates(repo_root: Path) -> List[str]:
    """Every committed series the membership criterion reaches: any JSON under
    data/indicators/door*/ with a valid direction field and quarterly history
    spanning 2005-Q1 through at least 2024-Q4. Lexicographic comparison is
    chronological for fixed-width 'YYYY-Qn' keys."""
    candidates: List[str] = []
    for path in sorted(repo_root.glob("data/indicators/door*/*.json")):
        try:
            raw = json.loads(path.read_text())
        except (OSError, json.JSONDecodeError):
            continue
        if raw.get("direction") not in VALID_DIRECTIONS:
            continue
        quarters = sorted({
            date_to_quarter(obs["date"])
            for obs in raw.get("data", [])
            if obs.get("value") is not None and obs.get("date")
        })
        if quarters and quarters[0] <= "2005-Q1" and quarters[-1] >= "2024-Q4":
            candidates.append(path.stem)
    return candidates


def gate_dispositions(repo_root: Path) -> Dict[str, object]:
    """Fail closed when any committed candidate is neither a registry member
    nor dispositioned in registry_exclusions — the published exhaustiveness
    claim is enforced here, not asserted."""
    universe = set(enumerate_candidates(repo_root))
    members = {s.input_id for s in REGISTRY}
    excluded = {e.candidate_id for e in EXCLUDED}
    undispositioned = sorted(universe - members - excluded)
    stale_exclusions = sorted(excluded - universe)
    overlap = sorted(members & excluded)
    return {
        "pass": not undispositioned and not overlap,
        "n_candidates": len(universe),
        "n_members": len(members & universe),
        "n_excluded": len(excluded & universe),
        "undispositioned": undispositioned,
        "member_and_excluded_overlap": overlap,
        "exclusions_not_currently_in_universe": stale_exclusions,
        "rule": (
            "every committed data/indicators/door*/ series with a direction "
            "field and quarterly history spanning 2005Q1-2024Q4+ is a registry "
            "member or carries a written disposition; an undispositioned "
            "candidate or a member-exclusion overlap fails the build"
        ),
    }


def gate_labels() -> Dict[str, object]:
    ok = (
        len(BAND_LABELS) == N_BANDS
        and BAND_LABELS == ("Minimal", "Low", "Typical", "High", "Severe")
        and band_for(0.0)[0] == 1
        and band_for(99.99)[0] == N_BANDS
        and band_for(0.0)[2] == "Minimal"
        and band_for(99.99)[2] == "Severe"
    )
    return {
        "pass": ok,
        "labels": list(BAND_LABELS),
        "rule": (
            "exactly N_BANDS labels, locked to the 2026-06-09 editorial decision, "
            "severity ascending with the band number (family band-direction rule)"
        ),
    }


def assemble_output(loaded: Dict[str, LoadedInput], result: BuildResult,
                    gates: Dict[str, Dict[str, object]]) -> Dict[str, object]:
    last_updated = max(li.last_updated for li in loaded.values())
    band_thresholds = [round(i * 100.0 / N_BANDS) for i in range(1, N_BANDS)]
    registry_block = []
    for spec in REGISTRY:
        li = loaded[spec.input_id]
        qs = sorted(li.quarterly_raw)
        registry_block.append({
            "input_id": spec.input_id,
            "name": li.name,
            "path": spec.rel_path,
            "domain": spec.domain,
            "direction": li.direction,
            "fred_series_id": li.fred_series_id,
            "source": li.source,
            "unit": li.unit,
            "n_quarters": len(qs),
            "first_quarter": qs[0],
            "last_quarter": qs[-1],
            "last_updated": li.last_updated,
            "member_weight_within_domain": round(member_weight(spec.domain), 6),
        })
    rows = []
    for q in result.published_quarters:
        band, band_range, band_label = band_for(result.composite[q])
        rows.append({
            "quarter": q,
            "composite": round(result.composite[q], 2),
            "band": band,
            "band_range": band_range,
            "band_label": band_label,
            "domains": {
                d: {
                    "score": round(result.domain_scores[q][d], 2),
                    "members_present": result.members_present[q][d],
                }
                for d in DOMAIN_IDS
            },
            "members": {
                spec.input_id: round(result.member_pct[spec.input_id][q], 2)
                for spec in REGISTRY
                if q in result.member_pct[spec.input_id]
            },
        })
    peak_q = max(result.published_quarters, key=lambda q: result.composite[q])
    trough_q = min(result.published_quarters, key=lambda q: result.composite[q])
    latest_q = result.published_quarters[-1]
    first_year = result.published_quarters[0].split("-")[0]
    latest_rank_pct = hazen_percentiles_series(
        [result.composite[q] for q in result.published_quarters]
    )[-1]
    return {
        "series_id": "adi",
        "name": "American Distress Index (ADI)",
        "taxonomy_version": TAXONOMY_VERSION,
        "level": "national",
        "frequency": "quarterly",
        "source": "American Default Research, computed from Federal Reserve Board, NY Fed CCP, BLS, DOL, and BEA series (attribution per input in methodology.registry)",
        "last_updated": last_updated,
        "summary": {
            "n_published_quarters": len(result.published_quarters),
            "first_quarter": result.published_quarters[0],
            "last_quarter": latest_q,
            "peak": {"quarter": peak_q, "composite": round(result.composite[peak_q], 2)},
            "trough": {"quarter": trough_q, "composite": round(result.composite[trough_q], 2)},
            "latest": {
                "quarter": latest_q,
                "composite": round(result.composite[latest_q], 2),
                "band": band_for(result.composite[latest_q])[0],
                "band_label": band_for(result.composite[latest_q])[2],
                "reading": (
                    f"On average, its inputs sit higher than in "
                    f"{round(result.composite[latest_q])}% of their own quarterly "
                    f"histories since {first_year}"
                ),
                "rank_in_history": {
                    "hazen_percentile": round(latest_rank_pct, 1),
                    "reading": (
                        f"The composite itself sits higher than "
                        f"{round(latest_rank_pct)}% of all published quarters "
                        f"since {first_year}"
                    ),
                    "rule": (
                        "Hazen percentile of the latest composite within the "
                        "published composite series; distinct from the composite "
                        "value, which is a mean of input percentiles, not itself "
                        "a percentile of quarters"
                    ),
                },
                "domains": {d: round(result.domain_scores[latest_q][d], 2) for d in DOMAIN_IDS},
            },
        },
        "methodology": {
            "reading": (
                "ADI(t) is the mean of five domain scores, each the mean of its members' "
                "Hazen percentiles within their own full quarterly history. A composite of "
                "62 means the index's inputs, on average, sit higher than in 62 percent of "
                f"their own quarterly histories since {first_year}. The composite is a mean "
                "of percentiles, not itself a percentile of quarters; the composite's own "
                "rank within published quarters is emitted separately as "
                "summary.latest.rank_in_history. ADI states where the present sits against "
                "the nation's own history; it measures current conditions and makes no "
                "forecast."
            ),
            "domains": {d: list(DOMAIN_MEMBERS[d]) for d in DOMAIN_IDS},
            "domain_weight": round(domain_weight(), 6),
            "weight_rule": gates["weights"]["rule"],
            "normalization": (
                "Hazen percentile per series over its entire available quarterly history "
                "in one pass: percentile = (average_rank - 0.5) / n * 100, ties averaged. "
                "One yardstick: no regime split, no baseline window, no winsorization, "
                "no z-cap, no goalpost anchors."
            ),
            "orientation_rule": (
                "Every input's direction field is read from its JSON; lower_is_worse "
                "values are negated before ranking so higher always means more distress. "
                "A missing or unexpected direction aborts the build."
            ),
            "missing_data_rules": [
                "A series-quarter exists only if at least one raw observation is dated within it; the quarterly value is the mean of the observations present. No interpolation, no carry-forward, no imputation.",
                "Domain score at quarter T = mean of member percentiles over members present at T, minimum one member; members_present is published per domain per quarter so dropout is visible.",
                "The composite publishes only when all five domains have at least one present member. A lagging member drops out of its domain's trailing quarter rather than stalling the index, and the score restates when it lands.",
                "A discontinued series stays in history; adding or removing a member is an explicit versioned registry diff, never a runtime fallback.",
                "Known-stale or broken sources are excluded up front: hud_fha_performance (stale, gappy, starts 2017), data/distress NY Fed snapshots (nyfed_household_debt.json carries no values — all 92 observations null; nyfed_delinquency.json shape inverted vs the published NY Fed transition series), short-history behavioral series (no GFC span).",
            ],
            "publication_rule": "publish a quarter only when all five domains have at least one member present",
            "bands": {
                "n_bands": N_BANDS,
                "thresholds": band_thresholds,
                "labels": list(BAND_LABELS),
                "derivation": (
                    "uniform segments, threshold_i = i * 100 / N_BANDS; labels locked by "
                    "editorial decision 2026-06-09, severity ascending with the band number "
                    "(family band-direction rule); thresholds are not calibrated to "
                    "narrative periods"
                ),
                "label_usage": (
                    "labels apply to the national time axis only and always publish "
                    "alongside the literal reading; state and county scores use ranks "
                    "and quintile wording, never these labels"
                ),
            },
            "registry_exclusions": {
                "membership_criterion": MEMBERSHIP_CRITERION,
                "excluded": [
                    {"candidate_id": e.candidate_id, "series": e.series, "reason": e.reason}
                    for e in EXCLUDED
                ],
            },
            "family_coverage_notes": FAMILY_COVERAGE_NOTES,
            "family_relation": (
                "SDI and CDI rank places against other places at one time using the same "
                "five-domain taxonomy, the same Hazen transform, and the same "
                "registry-derived equal-domain mean; ADI ranks the present quarter against "
                "the nation's own quarterly history. ADI is never an average of state or "
                "county scores: cross-sectional percentile ranks are a permutation of 1..n "
                "every period, so their mean is constant by construction and cannot "
                "register aggregate distress."
            ),
            "vintage_honesty": VINTAGE_HONESTY,
            "revision_model": (
                "Each refresh re-ranks every series' full history, so published historical "
                "scores restate by small amounts as the yardstick grows, as trailing "
                "partial quarters complete, and as upstream sources revise. This is the "
                "same revision model as CDI re-ranking counties each refresh."
            ),
            "registry": registry_block,
        },
        "validation": gates,
        "data": rows,
    }


def serialize(output: Dict[str, object]) -> str:
    return json.dumps(output, indent=2, ensure_ascii=False) + "\n"


def write_csv(output: Dict[str, object], path: Path) -> None:
    header = ["quarter", "composite", "band", "band_range", "band_label"]
    for d in DOMAIN_IDS:
        header += [f"{d}_score", f"{d}_members"]
    lines = [",".join(header)]
    for row in output["data"]:
        cells = [row["quarter"], f"{row['composite']:.2f}", str(row["band"]), row["band_range"], row["band_label"]]  # allow-literal: mirrors the json artifact's already-rounded 2dp values, zero-padding only
        for d in DOMAIN_IDS:
            cells += [f"{row['domains'][d]['score']:.2f}", str(row['domains'][d]['members_present'])]  # allow-literal: json-mirror zero-padding, same as the composite cell
        lines.append(",".join(cells))
    path.write_text("\n".join(lines) + "\n")


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------


def run_once() -> Tuple[Dict[str, object], str, bool]:
    """One full pass: load from disk, build, gate. Returns (output, serialized, all_pass)."""
    loaded = load_inputs(REPO_ROOT)
    result = build(loaded)
    gates: Dict[str, Dict[str, object]] = {}
    gates["weights"] = gate_weights()
    gates["labels"] = gate_labels()
    gates["dispositions"] = gate_dispositions(REPO_ROOT)
    gates["seam"] = gate_seam(result)
    gates["gfc"] = gate_gfc(result)
    gates["orientation"] = gate_orientation(loaded, result)
    output = assemble_output(loaded, result, gates)
    gates["vintage_honesty_present"] = {
        "pass": VINTAGE_HONESTY in json.dumps(output["methodology"]),
        "rule": "the methodology block must contain the vintage-honesty statement",
    }
    serialized = serialize(output)
    all_pass = all(bool(g["pass"]) for g in gates.values())
    return output, serialized, all_pass


def main() -> int:
    logging.basicConfig(level=logging.INFO, format="%(message)s")
    out1, ser1, pass1 = run_once()
    out2, ser2, pass2 = run_once()  # full second pass, re-read from disk
    reproducible = ser1 == ser2
    out1["validation"]["reproducibility"] = {
        "pass": reproducible,
        "rule": "two full in-process passes (each re-reading inputs from disk) serialize byte-identically; validate_adi.py re-verifies across separate processes",
    }
    ser1 = serialize(out1)
    all_pass = pass1 and pass2 and reproducible

    for gate_name, g in out1["validation"].items():
        logger.info("gate %-26s %s", gate_name, "PASS" if g["pass"] else "FAIL")
    if not all_pass:
        logger.error("GATE FAILURE — no output written.")
        return 1

    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    json_path = OUTPUT_DIR / "adi.json"
    csv_path = OUTPUT_DIR / "adi.csv"
    json_path.write_text(ser1)
    write_csv(out1, csv_path)
    logger.info("wrote %s", json_path)
    logger.info("wrote %s", csv_path)

    # Dual-write the public download mirrors byte-identically so they can
    # never drift from the canonical index (same contract as the retired
    # 4d_e engine's mirror; locked by
    # scripts/tests/test_index_family_adi_cold_review.py::test_public_mirror_byte_parity).
    public_dir = REPO_ROOT / "site" / "public" / "data" / "indexes"
    public_dir.mkdir(parents=True, exist_ok=True)
    (public_dir / "adi.json").write_text(ser1)
    (public_dir / "adi.csv").write_text(csv_path.read_text())
    logger.info("wrote %s", public_dir / "adi.json")
    logger.info("wrote %s", public_dir / "adi.csv")

    s = out1["summary"]
    logger.info("")
    logger.info("published: %s quarters, %s to %s", s["n_published_quarters"], s["first_quarter"], s["last_quarter"])
    logger.info("peak %s = %.2f | trough %s = %.2f", s["peak"]["quarter"], s["peak"]["composite"],
                s["trough"]["quarter"], s["trough"]["composite"])
    milestones = ["2005-Q1", "2006-Q1", "2006-Q4", "2008-Q4", "2009-Q3", "2014-Q4", "2015-Q1",
                  "2020-Q2", "2021-Q1", s["last_quarter"]]
    by_q = {r["quarter"]: r for r in out1["data"]}
    for q in milestones:
        r = by_q[q]
        domains = " ".join(f"{d.split('_')[0][:3]}={r['domains'][d]['score']:.1f}" for d in DOMAIN_IDS)  # allow-literal: console diagnostics only, not a published surface
        logger.info("%s  composite=%6.2f band=%s  %s", q, r["composite"], r["band"], domains)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())