#!/usr/bin/env python3
"""
Permutation tests comparing Homo and Civilization sites
against randomized land-point samples.

Inputs:
- stability_gradient.nc
- homo_site_metrics.csv
- civilization_site_metrics.csv

Outputs:
- permutation_test_results.csv
"""

import numpy as np
import pandas as pd
import xarray as xr
from scipy.interpolate import RegularGridInterpolator
from scipy.stats import mannwhitneyu

# ============================================================
# INPUT FILES
# ============================================================

STABILITY_NC = "stability_gradient.nc"
HOMO_CSV = "homo_site_metrics.csv"
CIV_CSV  = "civilization_site_metrics.csv"

N_RANDOM = 10000     # number of random land points per permutation
N_PERM   = 5000      # number of permutations (robust but feasible)

# ============================================================
# LOAD DATA
# ============================================================

ds = xr.open_dataset(STABILITY_NC)
G = ds["stability_gradient"].values
lat = ds["lat"].values
lon = ds["lon"].values

homo = pd.read_csv(HOMO_CSV).dropna()
civ  = pd.read_csv(CIV_CSV).dropna()

# ============================================================
# BUILD LAND MASK (SIMPLE, CONSERVATIVE)
# ============================================================

# Land = where stability gradient is finite and elevation exists
land_mask = np.isfinite(G)

lat2d, lon2d = np.meshgrid(lat, lon, indexing="ij")
land_lats = lat2d[land_mask]
land_lons = lon2d[land_mask]

# Interpolator for G
interp_G = RegularGridInterpolator((lat, lon), G, bounds_error=False)

# ============================================================
# OBSERVED STATISTICS
# ============================================================

homo_G = homo["stability_gradient"].values
civ_G  = civ["stability_gradient"].values

obs_homo_median = np.median(homo_G)
obs_civ_median  = np.median(civ_G)

# ============================================================
# PERMUTATION TEST
# ============================================================

rng = np.random.default_rng(42)

perm_homo_medians = np.zeros(N_PERM)
perm_civ_medians  = np.zeros(N_PERM)

for i in range(N_PERM):
    idx = rng.choice(len(land_lats), size=N_RANDOM, replace=False)
    pts = np.column_stack((land_lats[idx], land_lons[idx]))
    G_rand = interp_G(pts)

    perm_homo_medians[i] = np.median(G_rand[:len(homo_G)])
    perm_civ_medians[i]  = np.median(G_rand[:len(civ_G)])

# ============================================================
# EMPIRICAL P-VALUES
# ============================================================

p_homo = np.mean(perm_homo_medians <= obs_homo_median)
p_civ  = np.mean(perm_civ_medians <= obs_civ_median)

# ============================================================
# OUTPUT RESULTS
# ============================================================

results = pd.DataFrame({
    "group": ["Early Homo", "Early Civilizations"],
    "observed_median_gradient": [obs_homo_median, obs_civ_median],
    "permutation_p_value": [p_homo, p_civ]
})

results.to_csv("permutation_test_results.csv", index=False)

print("\n=== PERMUTATION TEST RESULTS ===\n")
print(results.to_string(index=False))

print("\nInterpretation:")
print("• p < 0.05 → sites occupy significantly more stable regions than random land points")
print("• p ≈ 0.5 → indistinguishable from random occupation")
print("• p → 0   → strong stability selection")

# ============================================================
# OPTIONAL: HOMO vs CIV COMPARISON AGAINST RANDOM
# ============================================================

u_stat, u_p = mannwhitneyu(homo_G, civ_G, alternative="two-sided")

print("\nHomo vs Civilization (direct comparison):")
print(f"Mann–Whitney U p-value = {u_p:.4g}")
