#!/usr/bin/env python3
"""
Optimized temporal phase-randomization null test
for TPW return diagnostics.

Statistical meaning identical to baseline version.
Optimized for Apple M3 Ultra / large unified memory.

Inputs:
- site_sampling_timeseries.csv
- model_event_rate_envelope.csv

Outputs:
- temporal_null_distributions.csv
- temporal_null_summary.csv
"""

import numpy as np
import pandas as pd
from tqdm import trange

# ============================================================
# PARAMETERS
# ============================================================

N_ITER = 20000
SEED = 123
np.random.seed(SEED)

# ============================================================
# LOAD DATA
# ============================================================

df_sites = pd.read_csv("site_sampling_timeseries.csv")
df_event = pd.read_csv("model_event_rate_envelope.csv")

# Ensure consistent ordering (time ascending)
df_event = df_event.sort_values("time_ka")
event_rate = df_event["event_rate_proxy"].values.astype(np.float32)

# Helper: extract site series
def get_series(group, col):
    s = (
        df_sites[df_sites["group"] == group]
        .sort_values("time_ka")[col]
        .values.astype(np.float32)
    )
    return s

series = {
    "homo_dzdt": get_series("Homo", "mean_abs_dzdt"),
    "homo_var":  get_series("Homo", "var_zero_contour_distance_deg2"),
    "civ_dzdt":  get_series("Civilization", "mean_abs_dzdt"),
    "civ_var":   get_series("Civilization", "var_zero_contour_distance_deg2"),
}

# Truncate event-rate envelope to match series length
L = min(len(event_rate), *(len(v) for v in series.values()))
event_rate = event_rate[:L]

for k in series:
    series[k] = series[k][:L]

# ============================================================
# NORMALIZE SERIES ONCE
# ============================================================

def zscore(x):
    return (x - x.mean()) / x.std()

event_norm = zscore(event_rate)

series_norm = {k: zscore(v) for k, v in series.items()}

# ============================================================
# OBSERVED ALIGNMENT METRICS
# ============================================================

observed = {
    k: np.mean(series_norm[k] * event_norm)
    for k in series_norm
}

# ============================================================
# TEMPORAL PHASE RANDOMIZATION
# ============================================================

# Pre-generate random circular shifts
shifts = np.random.randint(0, L, size=N_ITER)

records = np.zeros((N_ITER, len(series_norm)), dtype=np.float32)
keys = list(series_norm.keys())

for i in trange(N_ITER, desc="Temporal null"):
    rolled = np.roll(event_norm, shifts[i])
    for j, k in enumerate(keys):
        records[i, j] = np.mean(series_norm[k] * rolled)

# ============================================================
# WRITE NULL DISTRIBUTIONS
# ============================================================

df_null = pd.DataFrame(records, columns=keys)
df_null.to_csv("temporal_null_distributions.csv", index=False)

# ============================================================
# EMPIRICAL P-VALUES
# ============================================================

def pval(obs, null):
    return np.mean(null >= obs)

summary = []

for k in keys:
    summary.append({
        "metric": k,
        "observed_alignment": observed[k],
        "p_value": pval(observed[k], df_null[k].values)
    })

df_summary = pd.DataFrame(summary)
df_summary.to_csv("temporal_null_summary.csv", index=False)

print("✓ Optimized temporal phase randomization complete")
print("  - temporal_null_distributions.csv")
print("  - temporal_null_summary.csv")
