#!/usr/bin/env python3
"""
Combined diagnostic:
1) Site-based sampling of ΔZeff/Δt and stability metrics
2) Model-derived global event-rate envelope

Inputs:
- dzdt_event_guided.nc
- dzdt_continuous.nc
- zerocontour_velocity_diagnostics.nc
- tpw_return_sequence_event_guided_highcadence.nc
- early_homo_sites.geojson
- early_civilizations.geojson

Outputs:
- site_sampling_timeseries.csv
- site_sampling_summary.csv
- model_event_rate_envelope.csv
"""

import json
import numpy as np
import xarray as xr
import pandas as pd
from scipy.spatial import cKDTree
from skimage import measure

# ============================================================
# LOAD DATA
# ============================================================

dz_evt = xr.open_dataset("dzdt_event_guided.nc")["dzdt_event_guided"].values
dz_con = xr.open_dataset("dzdt_continuous.nc")["dzdt_continuous"].values

base = xr.open_dataset("tpw_return_sequence_event_guided_highcadence.nc")
Z = base["effective_elevation"].values
lat = base["lat"].values
lon = base["lon"].values
time_ka = base["time_ka"].values

lat2d, lon2d = np.meshgrid(lat, lon, indexing="ij")

# ============================================================
# LOAD SITE DATA
# ============================================================

def load_sites(fname):
    with open(fname) as f:
        g = json.load(f)
    return np.array([
        (feat["geometry"]["coordinates"][1],
         feat["geometry"]["coordinates"][0])
        for feat in g["features"]
    ])

homo_sites = load_sites("early_homo_sites.geojson")
civ_sites  = load_sites("early_civilizations.geojson")

# Build KDTree for grid lookup
grid_points = np.column_stack([lat2d.ravel(), lon2d.ravel()])
tree = cKDTree(grid_points)

def sample_field(field, sites):
    vals = []
    for lat_s, lon_s in sites:
        _, idx = tree.query([lat_s, lon_s])
        vals.append(field.ravel()[idx])
    return np.array(vals)

# ============================================================
# ZERO-CONTOUR DISTANCE FUNCTION
# ============================================================

def zero_contour_distance(z, sites):
    contours = measure.find_contours(z, 0.0)
    if not contours:
        return np.full(len(sites), np.nan)

    pts = []
    for c in contours:
        clat = np.interp(c[:,0], np.arange(len(lat)), lat)
        clon = np.interp(c[:,1], np.arange(len(lon)), lon)
        pts.append(np.column_stack([clat, clon]))

    pts = np.vstack(pts)
    tree_c = cKDTree(pts)

    dists = []
    for lat_s, lon_s in sites:
        d, _ = tree_c.query([lat_s, lon_s])
        dists.append(d)
    return np.array(dists)

# ============================================================
# SITE-BASED TIME SERIES
# ============================================================

records = []

for i in range(dz_evt.shape[0]):
    for label, sites in [("Homo", homo_sites), ("Civilization", civ_sites)]:

        dz_vals = np.abs(sample_field(dz_evt[i], sites))
        zdist   = zero_contour_distance(Z[i], sites)

        records.append({
            "time_ka": time_ka[i],
            "group": label,
            "mean_abs_dzdt": np.nanmean(dz_vals),
            "median_abs_dzdt": np.nanmedian(dz_vals),
            "mean_zero_contour_distance_deg": np.nanmean(zdist),
            "var_zero_contour_distance_deg2": np.nanvar(zdist)
        })

df_sites = pd.DataFrame(records)
df_sites.to_csv("site_sampling_timeseries.csv", index=False)

# Summary per group
df_sites.groupby("group").mean().to_csv(
    "site_sampling_summary.csv"
)

# ============================================================
# MODEL-DERIVED EVENT-RATE ENVELOPE
# ============================================================

# Global mean |ΔZeff/Δt| as event-rate proxy
event_rate = np.nanmean(np.abs(dz_evt), axis=(1,2))

# Normalize
event_rate_norm = event_rate / np.nanmax(event_rate)

df_event = pd.DataFrame({
    "time_ka": time_ka[:-1],
    "event_rate_proxy": event_rate_norm
})

df_event.to_csv("model_event_rate_envelope.csv", index=False)

print("✓ Combined site sampling and event-rate envelope complete")
print("  - site_sampling_timeseries.csv")
print("  - site_sampling_summary.csv")
print("  - model_event_rate_envelope.csv")
