#!/usr/bin/env python3 """American Distress Index (ADI) — family-v1 production engine (national level). The national member of the three-index family (ADI / SDI / CDI). SDI and CDI rank places against other places at one point in time; ADI ranks the present quarter against every quarter in the nation's own history, using the same five-domain taxonomy, the same Hazen percentile transform, the same orientation rule (every input flagged so higher = more distress), and the same registry-derived equal-domain weighting. Formula (each step implemented below, no fitted constants anywhere): 1. Registry: ten input series under data/indicators/** (the daily-refreshed library with direction metadata), each with a domain assignment. 2. Quarterly aggregation: a series-quarter exists only if at least one raw observation is dated inside it; its value is the arithmetic mean of the observations present. No interpolation, no carry-forward. 3. Orientation: where the JSON direction field says lower_is_worse (savings_rate only), the quarterly values are negated. After this step higher always means more distress. Missing or unexpected direction values abort the build (fail closed). 4. Normalization: one pass per series over its ENTIRE available quarterly history — Hazen percentile (average_rank - 0.5) / n * 100, ties averaged. One yardstick: no regime split, no baseline window, no winsorization, no z-cap. 5. Domain score: unweighted mean of the percentiles of the domain members present at that quarter (at least one required); members_present is published per domain per quarter. 6. Composite: sum over the five domains of domain_weight x domain_score, domain_weight = 1 / len(DOMAIN_IDS), computed from the registry. 7. Publication: a quarter publishes only when all five domains have at least one member present. 8. Scale: the composite is already 0-100 — an average percentile of the nation's own history. No further mapping. 9. Bands: uniform 20-point segments derived as i * 100 / N_BANDS. Band labels locked by editorial decision 2026-06-09: Minimal / Low / Typical / High / Severe, severity ascending with the band number (the family band-direction rule: a bigger band number always means more distress). Labels describe position within the nation's own history; they are not calibrated to narrative periods. The label always publishes alongside the literal reading ("on average, its inputs sit higher than in N% of their own quarterly histories") — the composite is a mean of percentiles, never quoted as a percentile of quarters; the composite's own quarters-rank is emitted separately as rank_in_history. The time axis gets adjectives; places (SDI/CDI) get ranks. 10. Build gates (any failure = no output written): seam, GFC window, orientation perturbation, in-process byte-reproducibility, weights uniformity, band-label lock, vintage-honesty statement presence. Run from the repo root: PYTHONPATH=. python3 scripts/indexes/compute_adi.py Reads only committed files under data/indicators/**; writes data/indexes/adi.json and data/indexes/adi.csv. Deterministic: no network, no wall-clock timestamps (output last_updated = max of input last_updated values), stable ordering, fixed rounding. Two runs on the same inputs are byte-identical (gated here and re-verified across processes by scripts/indexes/validate_adi.py — run the validator with an interpreter that has scipy, e.g. the repo venv; the engine itself is stdlib-only). """ from __future__ import annotations import json import logging import math import sys from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Optional, Sequence, Tuple THIS_DIR = Path(__file__).resolve().parent REPO_ROOT = THIS_DIR.parents[1] OUTPUT_DIR = REPO_ROOT / "data" / "indexes" if str(REPO_ROOT) not in sys.path: sys.path.insert(0, str(REPO_ROOT)) try: from scripts.indexes.family_normalization import hazen_percentiles_series # noqa: E402 from scripts.indicators.schema import date_to_quarter # noqa: E402 except ImportError as exc: # pragma: no cover raise SystemExit( "Cannot import scripts.indexes.family_normalization / " "scripts.indicators.schema. Run from the repo root with PYTHONPATH=. " "— see the module docstring." ) from exc logger = logging.getLogger("adi") # --------------------------------------------------------------------------- # Registry — the only place inputs, domains, and paths are declared. # Orientation is NOT declared here: it is read from each JSON's direction # field at load time and the build fails closed if the field is absent. # --------------------------------------------------------------------------- TAXONOMY_VERSION = "family-v1" DOMAIN_IDS: Tuple[str, ...] = ( "delinquency", "default_legal", "debt_burden", "labor", "safety_net_buffer", ) N_BANDS = 5 # Locked by editorial decision 2026-06-09. Severity ascends with the band # number (family band-direction rule); labels describe position within the # nation's own history. The site always pairs the label with the literal # reading; SDI/CDI never use these labels — places get ranks, not adjectives. BAND_LABELS: Tuple[str, ...] = ("Minimal", "Low", "Typical", "High", "Severe") VALID_DIRECTIONS = ("higher_is_worse", "lower_is_worse") @dataclass(frozen=True) class InputSpec: input_id: str rel_path: str domain: str REGISTRY: Tuple[InputSpec, ...] = ( InputSpec("mortgage_delinquency", "data/indicators/door1_debt/mortgage_delinquency.json", "delinquency"), InputSpec("credit_card_delinquency", "data/indicators/door1_debt/credit_card_delinquency.json", "delinquency"), InputSpec("consumer_loan_delinquency", "data/indicators/door1_debt/consumer_loan_delinquency.json", "delinquency"), # Canonical auto series. data/indicators/door1_debt/auto_delinquency_nyfed.json # carries identical dates and values (verified 2026-06-09) under a separate # registration in scripts/indicators/config.py; it is dispositioned in # registry_exclusions so the domain cannot double-count, and the duplicate # registration retires in the index-launch clean-break unit. InputSpec("auto_loan_delinquency", "data/indicators/door1_debt/auto_loan_delinquency.json", "delinquency"), InputSpec("credit_card_chargeoff", "data/indicators/door1_debt/credit_card_chargeoff.json", "default_legal"), # Lender-loss measure: label "mortgage charge-offs", never "foreclosures". InputSpec("foreclosure_chargeoff_proxy", "data/indicators/door2_legal/foreclosure_chargeoff_proxy.json", "default_legal"), InputSpec("debt_service_ratio", "data/indicators/door3_buffer/debt_service_ratio.json", "debt_burden"), InputSpec("unemployment_rate", "data/indicators/door4_income/unemployment_rate.json", "labor"), InputSpec("initial_claims", "data/indicators/door4_income/initial_claims.json", "labor"), InputSpec("savings_rate", "data/indicators/door3_buffer/savings_rate.json", "safety_net_buffer"), ) DOMAIN_MEMBERS: Dict[str, Tuple[str, ...]] = { d: tuple(s.input_id for s in REGISTRY if s.domain == d) for d in DOMAIN_IDS } # Registry membership criterion (closes cold-review MEDIUM-1, 2026-06-09 # docs/audit/cold_reviews/2026-06-09_index_rebuild_adi.md): a committed series # joins iff (a) it measures a household-distress LEVEL (not a spread or # derived difference), (b) it has quarterly-resolvable history spanning # 2005Q1-present, and (c) within its domain it is neither a population # subset/superset of an existing member nor lead-redundant with one. Every # committed family-fitting candidate is dispositioned here; adding or removing # a member is a versioned registry diff, never a silent edit. MEMBERSHIP_CRITERION = ( "A committed series joins the registry only if (a) it measures a " "household-distress rate or share — not a spread or derived difference, " "and not a nominal dollar or employment-count total whose full-history " "percentile tracks trend growth rather than household condition; " "(b) it has quarterly-resolvable history spanning 2005Q1 to the present; " "and (c) within its domain it is neither a population subset or superset " "of an existing member, nor a data-identical duplicate, nor " "level-redundant with one, nor distorted by a reporting-regime break. " "Every committed series under data/indicators/door*/ with a direction " "field and that span is either a registry member or carries a written " "disposition in registry_exclusions — enforced by the dispositions build " "gate, which fails closed when a new candidate appears undispositioned. " "Membership changes are versioned registry diffs." ) @dataclass(frozen=True) class ExcludedSpec: candidate_id: str series: str reason: str # Measured materiality of the four rankable rate exclusions (recorded in the # 2026-06-09 cold review): admitting DRCCLOBS moves 2025-Q4 from 44.62 to # 45.92; admitting all four moves it to 47.51; peak/trough/milestone story # invariant under every combination. The omissions tilt the current reading # DOWN, so exclusion is conservative rather than narrative-serving. # Every disposition fact below is verified against the committed files # (correlations computed on the committed quarterly series, 106 common # quarters, 2026-06-09). EXCLUDED: Tuple[ExcludedSpec, ...] = ( ExcludedSpec( "small_bank_cc_delinquency", "FRED DRCCLOBS", "population subset: the member DRCCLACBS covers credit-card " "delinquency at ALL commercial banks including the other-banks " "subset; admitting both double-weights card delinquency.", ), ExcludedSpec( "continuing_claims", "FRED CCSA", "mechanically downstream of the member initial_claims: continuing " "claims count prior initial claimants still drawing benefits " "(contemporaneous r=0.92 on the committed quarterly sample); the " "labor domain already carries the flow member (initial_claims) and " "the stock member (unemployment_rate).", ), ExcludedSpec( "u6_underemployment", "FRED U6RATE", "definitional superset: U-6 contains the U-3 unemployed count that " "the member unemployment_rate measures; admitting both double-counts " "every unemployed worker.", ), ExcludedSpec( "mortgage_debt_service", "FRED MDSP", "strict component of the member debt_service_ratio (TDSP = mortgage " "MDSP + consumer CDSP); admitting both double-counts mortgage debt " "service.", ), ExcludedSpec( "bank_delinquency_spread", "derived spread", "a derived difference between two bank delinquency rates, not a " "household-distress level; fails criterion (a) and has no stable " "distress orientation.", ), ExcludedSpec( "auto_delinquency_nyfed", "NY Fed CCP", "data-identical duplicate of the member auto_loan_delinquency " "(identical dates and values on all 92 quarters, verified " "2026-06-09) under a separate registration in " "scripts/indicators/config.py; the duplicate registration retires " "in the index-launch clean-break unit.", ), ExcludedSpec( "serious_delinquency_rate", "NY Fed CCP", "population superset: the all-product 90+ aggregate contains the " "product-level delinquency the domain's members measure " "individually; admitting both double-counts every delinquent " "balance.", ), ExcludedSpec( "total_delinquency_rate", "NY Fed CCP", "population superset: the all-product 30+ aggregate contains the " "product-level delinquency the domain's members measure " "individually; same class as serious_delinquency_rate.", ), ExcludedSpec( "student_loan_delinquency", "NY Fed CCP", "reporting-regime break: the federal payment moratorium held " "reported delinquency near zero by administrative action and the " "2025 resumption of credit-bureau reporting restored it as a step, " "so the series measures reporting policy as much as household " "behavior across the sample; excluded until a regime-consistent " "treatment exists (a future versioned registry diff).", ), ExcludedSpec( "parttime_economic_reasons", "BLS LNS12032194", "level-redundant with the member unemployment_rate (r=0.90 on the " "committed quarterly sample); the labor domain carries one stock " "and one flow member by design.", ), ExcludedSpec( "jolts_quits", "BLS JTSQUR", "measures labor-market churn and worker confidence, not a " "household-distress rate; fails criterion (a).", ), ExcludedSpec( "nonfarm_payrolls", "FRED PAYEMS", "a trend-dominated employment count that scales with population; " "its full-history percentile tracks growth, not household " "condition; fails criterion (a).", ), ExcludedSpec( "atlanta_fed_wage_q1", "Atlanta Fed wage tracker", "a wage-growth rate, not a household-distress rate; fails " "criterion (a).", ), ExcludedSpec( "wage_inflation_gap", "Bank of America Institute", "a derived spread (wage growth minus inflation), not a " "household-distress level; fails criterion (a), same class as " "bank_delinquency_spread.", ), ExcludedSpec( "bankruptcy_chargeoff_proxy", "FRED CORALACBN", "population superset and scope mismatch: charge-offs on ALL loans " "contain the member card and mortgage charge-offs and add " "commercial lending, which is outside household distress.", ), ExcludedSpec( "consumer_credit_outstanding", "FRED TOTALSL", "nominal dollar total: its full-history percentile tracks trend " "growth in population, income, and prices, not household " "condition; fails criterion (a).", ), ExcludedSpec( "credit_card_debt_total", "NY Fed CCP", "nominal dollar total; same class as consumer_credit_outstanding.", ), ExcludedSpec( "heloc_balance", "NY Fed CCP", "nominal dollar total; same class as consumer_credit_outstanding.", ), ExcludedSpec( "household_debt_total", "NY Fed CCP", "nominal dollar total; same class as consumer_credit_outstanding.", ), ExcludedSpec( "revolving_credit", "FRED REVOLSL", "nominal dollar total; same class as consumer_credit_outstanding.", ), ExcludedSpec( "mortgage_origination_volume", "NY Fed CCP", "nominal flow volume driven by the rate cycle and trend growth; " "low origination can mean tight credit or weak demand, so it has " "no stable distress orientation; fails criterion (a).", ), ) VINTAGE_HONESTY = ( "Inputs are revised series (Federal Reserve Board bank-condition rates via " "FRED, NY Fed Consumer Credit Panel, BLS, DOL, and BEA series via FRED). " "Historical ADI values are computed on today's revised vintages, not on the " "data as first published. Every run restates the full history: the percentile " "yardstick grows by one quarter per refresh, trailing partial quarters fill " "in as observations land, and upstream revisions re-rank past quarters. No " "out-of-sample claim is made for any historical reading, including the " "2008-2010 stretch." ) FAMILY_COVERAGE_NOTES = { "default_legal": ( "Carries the two household charge-off series only. The legal leg " "(bankruptcy filings) is absent nationally because committed US Courts " "quarterly coverage has a 2015Q2-2023Q2 hole; after the F-2 quarterly " "backfill it joins as an explicit registry diff. The mortgage leg " "(CORSFRMACBS) measures lender-loss foreclosures and reads near its " "floor in high-home-equity cycles while foreclosure filings rise, so " "terminal housing distress is understated; label it 'mortgage " "charge-offs', never 'foreclosures'." ), "safety_net_buffer": ( "Carries the private-buffer member only (personal saving rate, " "inverted). Committed public-assistance data lacks national history " "spanning 2005 forward (the committed SNAP file starts 2019-10); a " "long national assistance series joins later as a registry diff." ), "debt_burden": ( "Single member (household debt service ratio, FRED TDSP). The committed " "file starts 2005Q1, which binds the index start and the yardstick " "span. FRED publishes TDSP back to 1980; extending the pull to the " "full history is a planned versioned registry diff before site " "cutover (method unchanged)." ), } # --------------------------------------------------------------------------- # Loading and aggregation # --------------------------------------------------------------------------- @dataclass(frozen=True) class LoadedInput: spec: InputSpec direction: str last_updated: str source: str fred_series_id: Optional[str] name: str unit: str quarterly_raw: Dict[str, float] # quarter key -> mean of in-quarter observations @dataclass(frozen=True) class Perturbation: """A raw-scale perturbation for the orientation gate. mode 'unit' : one raw unit in the distress direction per the JSON direction field (+1 for higher_is_worse, -1 for lower_is_worse). mode 'extreme' : the raw value is set one unit beyond the series' worst-ever raw value in the distress direction, which must move the quarter to (or keep it at) the top rank. The sign comes from the direction field read out of the JSON — not from the pipeline's own orientation step — so a wiring error in orientation consumption makes this gate fail. """ input_id: str quarter: str mode: str # "unit" | "extreme" def quarter_index(q: str) -> int: year, qq = q.split("-Q") return int(year) * 4 + int(qq) - 1 def load_inputs(repo_root: Path) -> Dict[str, LoadedInput]: loaded: Dict[str, LoadedInput] = {} for spec in REGISTRY: raw = json.loads((repo_root / spec.rel_path).read_text()) direction = raw.get("direction") if direction not in VALID_DIRECTIONS: raise SystemExit( f"FAIL CLOSED: {spec.input_id} direction field is {direction!r}; " f"expected one of {VALID_DIRECTIONS}. Refusing to build." ) by_quarter: Dict[str, List[float]] = {} for obs in raw["data"]: value = obs.get("value") if value is None: continue by_quarter.setdefault(date_to_quarter(obs["date"]), []).append(float(value)) if not by_quarter: raise SystemExit(f"FAIL CLOSED: {spec.input_id} has zero usable observations.") loaded[spec.input_id] = LoadedInput( spec=spec, direction=direction, last_updated=str(raw.get("last_updated", "")), source=str(raw.get("source", "")), fred_series_id=raw.get("fred_series_id"), name=str(raw.get("name", spec.input_id)), unit=str(raw.get("unit", "")), quarterly_raw={q: sum(vs) / len(vs) for q, vs in by_quarter.items()}, ) return loaded # --------------------------------------------------------------------------- # Build: orientation -> percentiles -> domain scores -> composite # --------------------------------------------------------------------------- @dataclass class BuildResult: published_quarters: List[str] composite: Dict[str, float] # quarter -> composite (full precision) domain_scores: Dict[str, Dict[str, float]] # quarter -> domain -> score members_present: Dict[str, Dict[str, int]] # quarter -> domain -> count member_pct: Dict[str, Dict[str, float]] # input_id -> quarter -> percentile oriented: Dict[str, Dict[str, float]] # input_id -> quarter -> oriented value def domain_weight() -> float: return 1.0 / len(DOMAIN_IDS) def member_weight(domain: str) -> float: return 1.0 / len(DOMAIN_MEMBERS[domain]) def build(loaded: Dict[str, LoadedInput], perturb: Optional[Perturbation] = None) -> BuildResult: # Step 2 output (quarterly_raw) -> optional perturbation -> Step 3 orientation oriented: Dict[str, Dict[str, float]] = {} for input_id, li in loaded.items(): quarterly = dict(li.quarterly_raw) if perturb is not None and perturb.input_id == input_id: if perturb.quarter not in quarterly: raise ValueError(f"perturbation quarter {perturb.quarter} absent from {input_id}") if perturb.mode == "unit": delta = 1.0 if li.direction == "higher_is_worse" else -1.0 quarterly[perturb.quarter] = quarterly[perturb.quarter] + delta elif perturb.mode == "extreme": if li.direction == "higher_is_worse": quarterly[perturb.quarter] = max(quarterly.values()) + 1.0 else: quarterly[perturb.quarter] = min(quarterly.values()) - 1.0 else: # pragma: no cover raise ValueError(f"unknown perturbation mode {perturb.mode!r}") sign = -1.0 if li.direction == "lower_is_worse" else 1.0 oriented[input_id] = {q: sign * v for q, v in quarterly.items()} # Step 4: one full-history Hazen pass per series member_pct: Dict[str, Dict[str, float]] = {} for input_id, series in oriented.items(): quarters = sorted(series) # 'YYYY-Qn' sorts chronologically pcts = hazen_percentiles_series([series[q] for q in quarters]) member_pct[input_id] = dict(zip(quarters, pcts)) # Steps 5-7: domain scores, composite, publication rule all_quarters = sorted({q for series in member_pct.values() for q in series}) w_domain = domain_weight() published: List[str] = [] composite: Dict[str, float] = {} domain_scores: Dict[str, Dict[str, float]] = {} members_present: Dict[str, Dict[str, int]] = {} for q in all_quarters: per_domain: Dict[str, float] = {} per_domain_count: Dict[str, int] = {} for d in DOMAIN_IDS: present = [m for m in DOMAIN_MEMBERS[d] if q in member_pct[m]] if present: per_domain[d] = sum(member_pct[m][q] for m in present) / len(present) per_domain_count[d] = len(present) if len(per_domain) == len(DOMAIN_IDS): published.append(q) composite[q] = sum(w_domain * per_domain[d] for d in DOMAIN_IDS) domain_scores[q] = per_domain members_present[q] = per_domain_count return BuildResult(published, composite, domain_scores, members_present, member_pct, oriented) # --------------------------------------------------------------------------- # Gates — any failure means no output is written # --------------------------------------------------------------------------- def quantile_linear(values: Sequence[float], p: float) -> float: """Linearly interpolated quantile (the numpy default method), implemented here so the threshold derivation is visible in this file.""" s = sorted(values) n = len(s) if n == 1: return s[0] h = (n - 1) * p lo = math.floor(h) if lo + 1 >= n: return s[-1] return s[lo] + (s[lo + 1] - s[lo]) * (h - lo) def gate_weights() -> Dict[str, object]: w_d = domain_weight() ok = abs(w_d * len(DOMAIN_IDS) - 1.0) < 1e-12 member_ws = {} for d in DOMAIN_IDS: w_m = member_weight(d) member_ws[d] = w_m ok = ok and abs(w_m * len(DOMAIN_MEMBERS[d]) - 1.0) < 1e-12 # Total of domain_weight x member_weight over every (domain, member) cell # is len(members)*w_m*w_d summed over domains = sum of w_d = 1. total = sum(domain_weight() * member_weight(d) * len(DOMAIN_MEMBERS[d]) for d in DOMAIN_IDS) ok = ok and abs(total - 1.0) < 1e-12 return { "pass": ok, "domain_weight": w_d, "member_weights": member_ws, "total_weight": total, "rule": "domain_weight = 1/len(DOMAIN_IDS); member_weight = 1/len(domain members); zero hand-typed weight literals", } def gate_seam(result: BuildResult) -> Dict[str, object]: qs = result.published_quarters deltas = [] for a, b in zip(qs, qs[1:]): if quarter_index(b) - quarter_index(a) == 1: deltas.append(abs(result.composite[b] - result.composite[a])) threshold = quantile_linear(deltas, 0.95) seam = abs(result.composite["2015-Q1"] - result.composite["2014-Q4"]) return { "pass": seam <= threshold, "seam_abs_delta_2014Q4_to_2015Q1": round(seam, 4), "threshold_p95_of_all_abs_deltas": round(threshold, 4), "median_abs_delta": round(quantile_linear(deltas, 0.5), 4), "n_deltas": len(deltas), "rule": "|ADI(2015-Q1) - ADI(2014-Q4)| must not exceed the 95th percentile of all quarter-over-quarter |delta|", } def gate_gfc(result: BuildResult) -> Dict[str, object]: qs = result.published_quarters # Published range must be contiguous for rolling windows to be meaningful. contiguous = all(quarter_index(b) - quarter_index(a) == 1 for a, b in zip(qs, qs[1:])) window = 8 best_start, best_mean = None, -1.0 for i in range(len(qs) - window + 1): m = sum(result.composite[q] for q in qs[i : i + window]) / window if m > best_mean: best_mean, best_start = m, i start_q = qs[best_start] end_q = qs[best_start + window - 1] in_gfc = quarter_index("2008-Q1") <= quarter_index(start_q) <= quarter_index("2010-Q4") worst_quarters = sorted(qs, key=lambda q: result.composite[q], reverse=True)[:12] return { "pass": contiguous and in_gfc, "published_range_contiguous": contiguous, "worst_8q_window": f"{start_q} to {end_q}", "worst_8q_window_mean": round(best_mean, 2), "worst_12_quarters": worst_quarters, "rule": "the worst 8-quarter rolling mean must begin within 2008-2010", } def gate_orientation(loaded: Dict[str, LoadedInput], baseline: BuildResult) -> Dict[str, object]: rows = [] all_pass = True for spec in REGISTRY: input_id = spec.input_id candidates = [q for q in baseline.published_quarters if q in loaded[input_id].quarterly_raw] q_star = candidates[-1] base_c = baseline.composite[q_star] per_input = {"input_id": input_id, "direction": loaded[input_id].direction, "quarter": q_star, "baseline_composite": round(base_c, 4)} for mode in ("unit", "extreme"): perturbed = build(loaded, Perturbation(input_id, q_star, mode)) pert_c = perturbed.composite[q_star] if mode == "unit": ok = pert_c >= base_c - 1e-9 else: # Strict increase expected unless the quarter already held the # series' unique top rank before the perturbation. series = baseline.oriented[input_id] v_star = series[q_star] top = max(series.values()) already_unique_top = v_star == top and sum(1 for v in series.values() if v == top) == 1 ok = (pert_c >= base_c - 1e-9) if already_unique_top else (pert_c > base_c + 1e-9) per_input[f"{mode}_composite"] = round(pert_c, 4) per_input[f"{mode}_pass"] = ok all_pass = all_pass and ok rows.append(per_input) return { "pass": all_pass, "per_input": rows, "rule": ( "for every input, perturbing its latest published quarter in the distress " "direction (sign read from the JSON direction field, applied on the raw " "scale) must not decrease the composite at that quarter; the to-worst-ever " "perturbation must strictly increase it unless the quarter already held the " "series' unique top rank" ), } # --------------------------------------------------------------------------- # Output assembly # --------------------------------------------------------------------------- def band_for(composite_value: float) -> Tuple[int, str, str]: width = 100.0 / N_BANDS band = min(int(composite_value // width) + 1, N_BANDS) low = round((band - 1) * width) high = round(band * width) return band, f"{low}-{high}", BAND_LABELS[band - 1] def enumerate_candidates(repo_root: Path) -> List[str]: """Every committed series the membership criterion reaches: any JSON under data/indicators/door*/ with a valid direction field and quarterly history spanning 2005-Q1 through at least 2024-Q4. Lexicographic comparison is chronological for fixed-width 'YYYY-Qn' keys.""" candidates: List[str] = [] for path in sorted(repo_root.glob("data/indicators/door*/*.json")): try: raw = json.loads(path.read_text()) except (OSError, json.JSONDecodeError): continue if raw.get("direction") not in VALID_DIRECTIONS: continue quarters = sorted({ date_to_quarter(obs["date"]) for obs in raw.get("data", []) if obs.get("value") is not None and obs.get("date") }) if quarters and quarters[0] <= "2005-Q1" and quarters[-1] >= "2024-Q4": candidates.append(path.stem) return candidates def gate_dispositions(repo_root: Path) -> Dict[str, object]: """Fail closed when any committed candidate is neither a registry member nor dispositioned in registry_exclusions — the published exhaustiveness claim is enforced here, not asserted.""" universe = set(enumerate_candidates(repo_root)) members = {s.input_id for s in REGISTRY} excluded = {e.candidate_id for e in EXCLUDED} undispositioned = sorted(universe - members - excluded) stale_exclusions = sorted(excluded - universe) overlap = sorted(members & excluded) return { "pass": not undispositioned and not overlap, "n_candidates": len(universe), "n_members": len(members & universe), "n_excluded": len(excluded & universe), "undispositioned": undispositioned, "member_and_excluded_overlap": overlap, "exclusions_not_currently_in_universe": stale_exclusions, "rule": ( "every committed data/indicators/door*/ series with a direction " "field and quarterly history spanning 2005Q1-2024Q4+ is a registry " "member or carries a written disposition; an undispositioned " "candidate or a member-exclusion overlap fails the build" ), } def gate_labels() -> Dict[str, object]: ok = ( len(BAND_LABELS) == N_BANDS and BAND_LABELS == ("Minimal", "Low", "Typical", "High", "Severe") and band_for(0.0)[0] == 1 and band_for(99.99)[0] == N_BANDS and band_for(0.0)[2] == "Minimal" and band_for(99.99)[2] == "Severe" ) return { "pass": ok, "labels": list(BAND_LABELS), "rule": ( "exactly N_BANDS labels, locked to the 2026-06-09 editorial decision, " "severity ascending with the band number (family band-direction rule)" ), } def assemble_output(loaded: Dict[str, LoadedInput], result: BuildResult, gates: Dict[str, Dict[str, object]]) -> Dict[str, object]: last_updated = max(li.last_updated for li in loaded.values()) band_thresholds = [round(i * 100.0 / N_BANDS) for i in range(1, N_BANDS)] registry_block = [] for spec in REGISTRY: li = loaded[spec.input_id] qs = sorted(li.quarterly_raw) registry_block.append({ "input_id": spec.input_id, "name": li.name, "path": spec.rel_path, "domain": spec.domain, "direction": li.direction, "fred_series_id": li.fred_series_id, "source": li.source, "unit": li.unit, "n_quarters": len(qs), "first_quarter": qs[0], "last_quarter": qs[-1], "last_updated": li.last_updated, "member_weight_within_domain": round(member_weight(spec.domain), 6), }) rows = [] for q in result.published_quarters: band, band_range, band_label = band_for(result.composite[q]) rows.append({ "quarter": q, "composite": round(result.composite[q], 2), "band": band, "band_range": band_range, "band_label": band_label, "domains": { d: { "score": round(result.domain_scores[q][d], 2), "members_present": result.members_present[q][d], } for d in DOMAIN_IDS }, "members": { spec.input_id: round(result.member_pct[spec.input_id][q], 2) for spec in REGISTRY if q in result.member_pct[spec.input_id] }, }) peak_q = max(result.published_quarters, key=lambda q: result.composite[q]) trough_q = min(result.published_quarters, key=lambda q: result.composite[q]) latest_q = result.published_quarters[-1] first_year = result.published_quarters[0].split("-")[0] latest_rank_pct = hazen_percentiles_series( [result.composite[q] for q in result.published_quarters] )[-1] return { "series_id": "adi", "name": "American Distress Index (ADI)", "taxonomy_version": TAXONOMY_VERSION, "level": "national", "frequency": "quarterly", "source": "American Default Research, computed from Federal Reserve Board, NY Fed CCP, BLS, DOL, and BEA series (attribution per input in methodology.registry)", "last_updated": last_updated, "summary": { "n_published_quarters": len(result.published_quarters), "first_quarter": result.published_quarters[0], "last_quarter": latest_q, "peak": {"quarter": peak_q, "composite": round(result.composite[peak_q], 2)}, "trough": {"quarter": trough_q, "composite": round(result.composite[trough_q], 2)}, "latest": { "quarter": latest_q, "composite": round(result.composite[latest_q], 2), "band": band_for(result.composite[latest_q])[0], "band_label": band_for(result.composite[latest_q])[2], "reading": ( f"On average, its inputs sit higher than in " f"{round(result.composite[latest_q])}% of their own quarterly " f"histories since {first_year}" ), "rank_in_history": { "hazen_percentile": round(latest_rank_pct, 1), "reading": ( f"The composite itself sits higher than " f"{round(latest_rank_pct)}% of all published quarters " f"since {first_year}" ), "rule": ( "Hazen percentile of the latest composite within the " "published composite series; distinct from the composite " "value, which is a mean of input percentiles, not itself " "a percentile of quarters" ), }, "domains": {d: round(result.domain_scores[latest_q][d], 2) for d in DOMAIN_IDS}, }, }, "methodology": { "reading": ( "ADI(t) is the mean of five domain scores, each the mean of its members' " "Hazen percentiles within their own full quarterly history. A composite of " "62 means the index's inputs, on average, sit higher than in 62 percent of " f"their own quarterly histories since {first_year}. The composite is a mean " "of percentiles, not itself a percentile of quarters; the composite's own " "rank within published quarters is emitted separately as " "summary.latest.rank_in_history. ADI states where the present sits against " "the nation's own history; it measures current conditions and makes no " "forecast." ), "domains": {d: list(DOMAIN_MEMBERS[d]) for d in DOMAIN_IDS}, "domain_weight": round(domain_weight(), 6), "weight_rule": gates["weights"]["rule"], "normalization": ( "Hazen percentile per series over its entire available quarterly history " "in one pass: percentile = (average_rank - 0.5) / n * 100, ties averaged. " "One yardstick: no regime split, no baseline window, no winsorization, " "no z-cap, no goalpost anchors." ), "orientation_rule": ( "Every input's direction field is read from its JSON; lower_is_worse " "values are negated before ranking so higher always means more distress. " "A missing or unexpected direction aborts the build." ), "missing_data_rules": [ "A series-quarter exists only if at least one raw observation is dated within it; the quarterly value is the mean of the observations present. No interpolation, no carry-forward, no imputation.", "Domain score at quarter T = mean of member percentiles over members present at T, minimum one member; members_present is published per domain per quarter so dropout is visible.", "The composite publishes only when all five domains have at least one present member. A lagging member drops out of its domain's trailing quarter rather than stalling the index, and the score restates when it lands.", "A discontinued series stays in history; adding or removing a member is an explicit versioned registry diff, never a runtime fallback.", "Known-stale or broken sources are excluded up front: hud_fha_performance (stale, gappy, starts 2017), data/distress NY Fed snapshots (nyfed_household_debt.json carries no values — all 92 observations null; nyfed_delinquency.json shape inverted vs the published NY Fed transition series), short-history behavioral series (no GFC span).", ], "publication_rule": "publish a quarter only when all five domains have at least one member present", "bands": { "n_bands": N_BANDS, "thresholds": band_thresholds, "labels": list(BAND_LABELS), "derivation": ( "uniform segments, threshold_i = i * 100 / N_BANDS; labels locked by " "editorial decision 2026-06-09, severity ascending with the band number " "(family band-direction rule); thresholds are not calibrated to " "narrative periods" ), "label_usage": ( "labels apply to the national time axis only and always publish " "alongside the literal reading; state and county scores use ranks " "and quintile wording, never these labels" ), }, "registry_exclusions": { "membership_criterion": MEMBERSHIP_CRITERION, "excluded": [ {"candidate_id": e.candidate_id, "series": e.series, "reason": e.reason} for e in EXCLUDED ], }, "family_coverage_notes": FAMILY_COVERAGE_NOTES, "family_relation": ( "SDI and CDI rank places against other places at one time using the same " "five-domain taxonomy, the same Hazen transform, and the same " "registry-derived equal-domain mean; ADI ranks the present quarter against " "the nation's own quarterly history. ADI is never an average of state or " "county scores: cross-sectional percentile ranks are a permutation of 1..n " "every period, so their mean is constant by construction and cannot " "register aggregate distress." ), "vintage_honesty": VINTAGE_HONESTY, "revision_model": ( "Each refresh re-ranks every series' full history, so published historical " "scores restate by small amounts as the yardstick grows, as trailing " "partial quarters complete, and as upstream sources revise. This is the " "same revision model as CDI re-ranking counties each refresh." ), "registry": registry_block, }, "validation": gates, "data": rows, } def serialize(output: Dict[str, object]) -> str: return json.dumps(output, indent=2, ensure_ascii=False) + "\n" def write_csv(output: Dict[str, object], path: Path) -> None: header = ["quarter", "composite", "band", "band_range", "band_label"] for d in DOMAIN_IDS: header += [f"{d}_score", f"{d}_members"] lines = [",".join(header)] for row in output["data"]: cells = [row["quarter"], f"{row['composite']:.2f}", str(row["band"]), row["band_range"], row["band_label"]] # allow-literal: mirrors the json artifact's already-rounded 2dp values, zero-padding only for d in DOMAIN_IDS: cells += [f"{row['domains'][d]['score']:.2f}", str(row['domains'][d]['members_present'])] # allow-literal: json-mirror zero-padding, same as the composite cell lines.append(",".join(cells)) path.write_text("\n".join(lines) + "\n") # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def run_once() -> Tuple[Dict[str, object], str, bool]: """One full pass: load from disk, build, gate. Returns (output, serialized, all_pass).""" loaded = load_inputs(REPO_ROOT) result = build(loaded) gates: Dict[str, Dict[str, object]] = {} gates["weights"] = gate_weights() gates["labels"] = gate_labels() gates["dispositions"] = gate_dispositions(REPO_ROOT) gates["seam"] = gate_seam(result) gates["gfc"] = gate_gfc(result) gates["orientation"] = gate_orientation(loaded, result) output = assemble_output(loaded, result, gates) gates["vintage_honesty_present"] = { "pass": VINTAGE_HONESTY in json.dumps(output["methodology"]), "rule": "the methodology block must contain the vintage-honesty statement", } serialized = serialize(output) all_pass = all(bool(g["pass"]) for g in gates.values()) return output, serialized, all_pass def main() -> int: logging.basicConfig(level=logging.INFO, format="%(message)s") out1, ser1, pass1 = run_once() out2, ser2, pass2 = run_once() # full second pass, re-read from disk reproducible = ser1 == ser2 out1["validation"]["reproducibility"] = { "pass": reproducible, "rule": "two full in-process passes (each re-reading inputs from disk) serialize byte-identically; validate_adi.py re-verifies across separate processes", } ser1 = serialize(out1) all_pass = pass1 and pass2 and reproducible for gate_name, g in out1["validation"].items(): logger.info("gate %-26s %s", gate_name, "PASS" if g["pass"] else "FAIL") if not all_pass: logger.error("GATE FAILURE — no output written.") return 1 OUTPUT_DIR.mkdir(parents=True, exist_ok=True) json_path = OUTPUT_DIR / "adi.json" csv_path = OUTPUT_DIR / "adi.csv" json_path.write_text(ser1) write_csv(out1, csv_path) logger.info("wrote %s", json_path) logger.info("wrote %s", csv_path) # Dual-write the public download mirrors byte-identically so they can # never drift from the canonical index (same contract as the retired # 4d_e engine's mirror; locked by # scripts/tests/test_index_family_adi_cold_review.py::test_public_mirror_byte_parity). public_dir = REPO_ROOT / "site" / "public" / "data" / "indexes" public_dir.mkdir(parents=True, exist_ok=True) (public_dir / "adi.json").write_text(ser1) (public_dir / "adi.csv").write_text(csv_path.read_text()) logger.info("wrote %s", public_dir / "adi.json") logger.info("wrote %s", public_dir / "adi.csv") s = out1["summary"] logger.info("") logger.info("published: %s quarters, %s to %s", s["n_published_quarters"], s["first_quarter"], s["last_quarter"]) logger.info("peak %s = %.2f | trough %s = %.2f", s["peak"]["quarter"], s["peak"]["composite"], s["trough"]["quarter"], s["trough"]["composite"]) milestones = ["2005-Q1", "2006-Q1", "2006-Q4", "2008-Q4", "2009-Q3", "2014-Q4", "2015-Q1", "2020-Q2", "2021-Q1", s["last_quarter"]] by_q = {r["quarter"]: r for r in out1["data"]} for q in milestones: r = by_q[q] domains = " ".join(f"{d.split('_')[0][:3]}={r['domains'][d]['score']:.1f}" for d in DOMAIN_IDS) # allow-literal: console diagnostics only, not a published surface logger.info("%s composite=%6.2f band=%s %s", q, r["composite"], r["band"], domains) return 0 if __name__ == "__main__": raise SystemExit(main())