import xarray
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.axes import Axes
import pandas as pd
= ["01", "05", "06", "07", "08"]
RUN_VARS = ["tasmax", "tasmin", "pr"]
VARS
def get_root_filepath():
return "https://climrecal.blob.core.windows.net/analysis/cpm-median-time-series"
def add_cols(df):
"day"] = df["time"].apply(lambda x: x.timetuple().tm_yday)
df["month"] = df["time"].apply(lambda x: x.month)
df["year"] = df["time"].apply(lambda x: x.year)
df["day_of_month"] = df["time"].apply(lambda x: x.day)
df["leap_year"] = df["time"].apply(lambda x: x.year % 4 == 0)
df[return df
def get_days(is_leap_year: bool) -> np.array:
if not is_leap_year:
# https://docs.xarray.dev/en/stable/generated/xarray.Dataset.convert_calendar.html
# February 6th (36), April 19th (109), July 2nd (183), September 12th (255), November 25th (329).
# First missing day should be 37, not 36 since February 6th is 37
return np.array([37, 109, 183, 255, 329])
else:
# January 31st (31), March 31st (91), June 1st (153), July 31st (213), September 31st (275) and November 30th (335).
return np.array([31, 91, 153, 213, 275, 335])
def get_data() -> dict[str, dict[str, tuple[pd.DataFrame, pd.DataFrame]]]:
= {}
data for var in VARS:
= {}
data[var] for run_var in RUN_VARS:
= f"{get_root_filepath()}/cpm-raw-medians/median-{var}-{run_var}.nc#mode=bytes"
path_raw = f"{get_root_filepath()}/cpm-converted-nearest-medians/median-{var}-{run_var}.nc#mode=bytes"
path_con = xarray.load_dataset(path_raw)
x_raw = xarray.load_dataset(path_con)
x_con = add_cols(
df_raw "standard", align_on="year")
x_raw.convert_calendar(
.to_pandas()
.reset_index()
)= add_cols(x_con.to_pandas().reset_index())
df_con = (df_raw, df_con)
data[var][run_var] return data
# Load all data
= get_data() data
Warning 3: Cannot find header.dxf (GDAL_DATA is not defined)
# Example
= "tasmax", "01"
var, run_var = data[var][run_var] df_raw, df_con
# Missing Day 37
64:70] df_raw.iloc[
time | tasmax | day | month | year | day_of_month | leap_year | |
---|---|---|---|---|---|---|---|
64 | 1981-02-04 12:00:00 | 7.901025 | 35 | 2 | 1981 | 4 | False |
65 | 1981-02-05 12:00:00 | 9.846338 | 36 | 2 | 1981 | 5 | False |
66 | 1981-02-07 12:00:00 | 10.244776 | 38 | 2 | 1981 | 7 | False |
67 | 1981-02-08 12:00:00 | 7.123193 | 39 | 2 | 1981 | 8 | False |
68 | 1981-02-09 12:00:00 | 7.830713 | 40 | 2 | 1981 | 9 | False |
69 | 1981-02-10 12:00:00 | 7.916040 | 41 | 2 | 1981 | 10 | False |
def plot_by_day(
data,str,
var: str,
run_var: int,
year: bool,
leap_year: bool,
plot_diff:
ax: Axes,=0.8,
lw
):= data[var][run_var]
df_raw, df_con if not plot_diff:
for i, df in enumerate([df_raw, df_con]):
if leap_year is not None:
= df[df["leap_year"].eq(leap_year)]
df2 else:
= df
df2 if year is None:
= df2.groupby("day")[var].median()
x else:
= df2[df2["year"].eq(year)].groupby("day")[var].first()
x if i == 0:
if leap_year is not None:
ax.vlines(
get_days(leap_year),min(),
x.max(),
x.=-1,
zorder=0.4,
lw="k",
color=":",
ls
)=lw, alpha=0.8)
ax.plot(x.index, x, lwelse:
=lw, alpha=0.8)
ax.plot(x.index, x, lw# pass
else:
= []
series for i, df in enumerate([df_raw, df_con]):
if leap_year is not None:
= df[df["leap_year"].eq(leap_year)]
df2 else:
= df
df2 if year is None:
= df2.groupby("day")[var].median()
x else:
= df2[df2["year"].eq(year)].groupby("day")[var].first()
x
series.append(x)
# Proportion difference
# x2 = (series[1] - series[0]) / series[0]
# Use absolute difference
= series[1] - series[0]
x2
ax.vlines(
get_days(leap_year),min(),
x2.max(),
x2.=-1,
zorder=0.4,
lw="k",
color=":",
ls
)=lw, alpha=0.8)
ax.plot(x2.index, x2, lw
f"Year: {year}, Leap year: {leap_year}")
ax.set_title(if not plot_diff:
f"Absolute difference ({var})")
ax.set_ylabel(else:
f"Value ({var})") ax.set_ylabel(
def plot_array(data, year, leap_year=True, plot_diff=False, lw=0.8):
= ["01", "05", "06", "07", "08"]
run_vars vars = ["tasmax", "tasmin", "pr"]
= plt.subplots(
fig, axs len(vars),
len(run_vars),
=False,
squeeze=True,
sharex="row",
sharey=(14, 6),
figsize
)for row, var in enumerate(vars):
for col, run_var in enumerate(run_vars):
= axs[row][col]
ax =ax, lw=lw)
plot_by_day(data, var, run_var, year, leap_year, plot_diff, axif col == 0:
="medium")
ax.set_ylabel(var, fontsizeelse:
"")
ax.set_ylabel(if row == 0:
f"Run: {run_var}", fontsize="medium")
ax.set_title(else:
"") ax.set_title(
"tasmax", "08", None, leap_year=True, plot_diff=False, ax=plt.gca()) plot_by_day(data,
"tasmax", "08", None, leap_year=True, plot_diff=True, ax=plt.gca()) plot_by_day(data,
"tasmax", "08", None, leap_year=False, plot_diff=True, ax=plt.gca()) plot_by_day(data,
# Plot arrays
for year in [None, 1984]:
for leap_year in [True, False]:
for plot_diff in [False, True]:
if year is not None:
= year if leap_year else 1983
actual_year else:
= None
actual_year print(
f"Plot diff: {plot_diff}; year: {actual_year}; leap_year: {leap_year}"
)# Leap years, medians
=actual_year, leap_year=leap_year, plot_diff=plot_diff)
plot_array(data, year plt.show()
Plot diff: False; year: None; leap_year: True
Plot diff: True; year: None; leap_year: True
Plot diff: False; year: None; leap_year: False
Plot diff: True; year: None; leap_year: False
Plot diff: False; year: 1984; leap_year: True
Plot diff: True; year: 1984; leap_year: True
Plot diff: False; year: 1983; leap_year: False
Plot diff: True; year: 1983; leap_year: False
# Explore location of spike in difference in var="tasmax", run_var="08"
plot_by_day(
data,="tasmax",
var="08",
run_var=1983,
year=False,
leap_year=False,
plot_diff=plt.gca(),
ax
)# Subset days to region where spike is
320, 340) plt.xlim(
plot_by_day(
data,="tasmax",
var="08",
run_var=1983,
year=False,
leap_year=True,
plot_diff=plt.gca(),
ax
)320, 340) plt.xlim(
# Explore location of spike in difference in var="tasmax", run_var="08"
plot_by_day(
data,="tasmax",
var="08",
run_var=1984,
year=True,
leap_year=False,
plot_diff=plt.gca(),
ax
)# Subset days to region where spike is
320, 340) plt.xlim(
# Explore location of spike in difference in var="tasmax", run_var="08"
plot_by_day(
data,="tasmax",
var="08",
run_var=1984,
year=True,
leap_year=True,
plot_diff=plt.gca(),
ax
)# Subset days to region where spike is
320, 340) plt.xlim(