#!/usr/bin/env python3

import json
import numpy as np
import matplotlib.pyplot as plt

from scipy.stats import spearmanr, linregress

# ------------------------------------------------------------
# Load distance-to-contour GeoJSON
# ------------------------------------------------------------

with open("early_homo_distance_to_zero_contour.geojson", "r") as f:
    data = json.load(f)

ages = []
distances = []
sides = []

for feat in data["features"]:
    props = feat["properties"]

    min_ma = props.get("min_ma")
    max_ma = props.get("max_ma")
    dist = props.get("distance_to_zero_contour_km")

    if min_ma is None or max_ma is None or dist is None:
        continue

    try:
        min_ma = float(min_ma)
        max_ma = float(max_ma)
        dist = float(dist)
    except ValueError:
        continue

    # Representative age (midpoint)
    age = 0.5 * (min_ma + max_ma)

    ages.append(age)
    distances.append(dist)
    sides.append(props.get("side_of_contour"))

ages = np.array(ages)
distances = np.array(distances)

# ------------------------------------------------------------
# Correlation analysis
# ------------------------------------------------------------

rho, pval = spearmanr(ages, distances)

print(f"Spearman rho = {rho:.3f}")
print(f"p-value      = {pval:.3e}")

# ------------------------------------------------------------
# Linear regression (for visual trend only)
# ------------------------------------------------------------

slope, intercept, r, p, stderr = linregress(ages, distances)

xfit = np.linspace(ages.min(), ages.max(), 200)
yfit = intercept + slope * xfit

# ------------------------------------------------------------
# Plot
# ------------------------------------------------------------

plt.figure(figsize=(8, 6))

# Colour by side of contour
colors = ["tab:blue" if s == "emergent" else "tab:red" for s in sides]

plt.scatter(
    ages,
    distances,
    c=colors,
    alpha=0.75,
    edgecolor="black",
    linewidth=0.4,
    s=40,
    label="Homo sites"
)

plt.plot(
    xfit,
    yfit,
    color="black",
    linewidth=2,
    label="Linear trend"
)

plt.xlabel("Site age (Ma before present)")
plt.ylabel("Distance to equilibrium margin (km)")

plt.title(
    "Distance of Homo Sites from Equilibrium Sea-Level Margin\n"
    f"Spearman ρ = {rho:.2f},  p = {pval:.1e}"
)

plt.grid(alpha=0.3)
plt.legend(frameon=True)

# Older ages to the left (geological convention)
plt.gca().invert_xaxis()

plt.tight_layout()
plt.savefig(
    "homo_age_vs_distance_to_zero_contour.png",
    dpi=300
)
plt.show()
