Comparing Two Climate Models¶
In this notebook, I will be comparing two climate reanalysis models:
- NCEP-DOE Reanalysis 2: Surface
- ERA5
I will be looking at the following variables:
- Surface Pressure
- Mean Sea Level Pressure
- Total Column Water
The idea is simple: these two models should have very similar properties. I will be trying to user RBIG in order to assess how similar these models are. I'll be looking at the following IT measures
- Entropy
- Total Correlation
- Mutual Information
If these climate models are that similar, then they should exhibit similar IT measures.
Data - Climate Models¶
import os, sys
cwd = os.getcwd()
source_path = f"{cwd}/../../"
sys.path.insert(0, f'{source_path}')
import numpy as np
# Import RBIG Helper
from src.models.train_models import run_rbig_models
# ESDC tools
sys.path.insert(0, f'/home/emmanuel/code/py_esdc')
from esdc.standardize import normalize_temporal
from esdc.transform import regrid_data
import cdsapi
from zipfile import ZipFile
import pandas as pd
import xarray as xr
from tqdm import tqdm
from sklearn import preprocessing
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
%load_ext autoreload
%autoreload 2
data_path = f"/home/emmanuel/projects/2020_rbig_rs/data/climate/raw/"
results_path = f"/home/emmanuel/projects/2020_rbig_rs/data/climate/results/"
fig_path = f"/home/emmanuel/projects/2020_rbig_rs/reports/figures/climate/"
ERA5¶
era5_data = xr.open_dataset(f"{data_path}ERA5.nc")
era5_data = era5_data.rename({'msl': 'mslp', 'latitude': 'lat', 'longitude': 'lon'})
# era5_data = era5_data.rename({'latitude': 'lat'})
# era5_data.attrs['model_id'] = 'era5'
# rescale model from 0.25 to 2.5 degrees
# era5_data = era5_data.coarsen(lat=10, lon=10, boundary='pad').mean()
era5_data.attrs['model_id'] = 'era5'
era5_data
NCAR-NCEP-DOE-II¶
ncep_data = xr.open_mfdataset(f"{data_path}*mon.mean.nc")
ncep_data = ncep_data.rename({'pres': 'sp'})
ncep_data.attrs['model_id'] = 'ncar_ncep_doe_2'
ncep_data
Regridding¶
era5_data_regrid = regrid_data(ncep_data, era5_data)
era5_data_regrid.attrs = era5_data.attrs
era5_data_regrid = xr.Dataset()
era5_data_regrid = xr.Dataset()
era5_data_regrid['sp'] = era5_regrid
era5_data_regrid.attrs = era5_data.attrs
era5_data_regrid
CMIP5¶
cmip5_data = xr.open_dataset(f"{data_path}CMIP5.nc")
cmip5_data = cmip5_data.rename({'psl': 'mslp'})
# rescale model from 0.25 to 2.5 degrees
# cmip5_data = cmip5_data.coarsen(lat=1, boundary='pad').mean()
cmip5_data.attrs['model_id'] = 'cmip5'
cmip5_data
cmip5_regrid = regrid_data(ncep_data.mslp, cmip5_data.mslp)
cmip5_data_regrid = xr.Dataset()
cmip5_data_regrid['mslp'] = cmip5_regrid
cmip5_data_regrid.attrs = cmip5_data.attrs
cmip5_data_regrid
Experiment I - Comparing Climate Models¶
Mean Sea Level Pressure¶
ERA5 vs NCEP¶
# Experiment class
class ClimateEntropy:
def __init__(self, save_path: None, variable: str='mslp', save_name=None, mi: bool=True):
self.variable = variable
self.results_path = save_path
self.results_df = pd.DataFrame()
self.save_name = save_name
self.mi = mi
def run_experiment(self, climate_model1: pd.DataFrame, climate_model2: pd.DataFrame) -> None:
"""Training loop that goes through each year and calculates the entropy,
total correlation and mutual information between the two models."""
time_length = len(climate_model1.groupby('time.year'))
# Normalize BEFORE the individual calculations
climate_model1[self.variable] = normalize_temporal(climate_model1[self.variable])
model1_id = climate_model1.attrs['model_id']
model2_id = climate_model2.attrs['model_id']
climate_model2[self.variable] = normalize_temporal(climate_model2[self.variable])
with tqdm(zip(
climate_model1.groupby('time.year'),
climate_model2.groupby('time.year')
), total=time_length) as progress_bar:
for imodel1, imodel2 in progress_bar:
# Update params in progress bar
# Transform to dataframe, remove spatial dimensions
X1 = self._get_time_features(imodel1[1][self.variable])
X2 = self._get_time_features(imodel2[1][self.variable])
# Normalize inputs
min_max_scaler = preprocessing.StandardScaler()
X1 = min_max_scaler.fit_transform(X1.values)
X2 = min_max_scaler.fit_transform(X2.values)
dims = X1.shape[1]
# =============================
# Calculate Mutual Information
# =============================
if self.mi == False:
mi_ = None
mi_t_ = None
else:
mi_, mi_t_ = run_rbig_models(X1, X2, measure='mi', verbose=None)
# Update params in progress bar
postfix = dict(
)
# ========================================
# Calculate Entropy and Total Correlation
# ========================================
# Model I
tc1_, h1_, h_t1_ = run_rbig_models(X1, measure='t', verbose=None)
self._update_results(
model=model1_id,
year=imodel1[0],
h_time=h_t1_,
tc=tc1_,
h=h1_,
mi=mi_,
mi_time=mi_t_,
dims=dims,
)
# Model II
tc2_, h2_, h_t2_ = run_rbig_models(X2, measure='t', verbose=None)
self._update_results(
model=model2_id,
year=imodel2[0],
h_time=h_t2_,
tc=tc2_,
h=h2_,
mi=mi_,
mi_time=mi_t_,
dims=dims,
)
# Update params in progress bar
postfix = dict(
year=imodel1[0],
mi=f"{mi_:.3f}" if self.mi is True else None,
h1=f"{h1_:.3f}",
tc1=f"{tc1_:.3f}",
h2=f"{h2_:.3f}",
tc2=f"{tc2_:.3f}",
)
progress_bar.set_postfix(postfix)
return None
def _get_time_features(self, data_df: pd.DataFrame)-> pd.DataFrame:
"""This function collapses the spatial dimensions as pivots. This allows
us to only consider time as the input feature."""
return data_df.to_dataframe().unstack(level=0).reset_index().drop(columns=['lat', 'lon']).dropna()
def _update_results(self, model, year, tc, h, h_time, mi, mi_time, dims):
"""appends new values to the results dataframe."""
self.results_df = self.results_df.append({
'model': model,
'year': year,
'tc': tc,
'h': h,
'h_time': h_time,
'mi': mi,
'mi_time': mi_time,
'dims': dims,
}, ignore_index=True
)
if self.results_path is not None:
self._save_results()
return self
def _save_results(self):
"""Saves the dataframe to the assigned results path."""
self.results_df.to_csv(f"{self.results_path}{self.variable}_{self.save_name}.csv")
return None
# Initialize experiment
short_decade_exp = ClimateEntropy(
save_path=f"{results_path}",
variable='mslp',
save_name='era_ncep'
)
# run experiment (shorter decade)
short_decade_exp.run_experiment(era5_data_regrid, ncep_data)
# extract results
results_df = short_decade_exp.results_df
ERA5 vs CMIP5¶
2006-01-16, 2025-12-16, 1979-01-01, 2019-07-01
# Initialize experiment
short_decade_exp = ClimateEntropy(
save_path=f"{results_path}", variable='mslp', save_name='era_cmip',
mi=True
)
# run experiment (shorter decade)
short_decade_exp.run_experiment(
era5_data_regrid.sel(time=slice('2006-01-16', '2019-07-01')),
cmip5_data_regrid.sel(time=slice('2006-01-16', '2019-07-01'))
)
NCEP vs CMIP5¶
# Initialize experiment
short_decade_exp = ClimateEntropy(
save_path=f"{results_path}", variable='mslp', save_name='ncep_cmip',
mi=True
)
# run experiment (shorter decade)
short_decade_exp.run_experiment(
ncep_data.sel(time=slice('2006-01-16', '2019-07-01')),
cmip5_data_regrid.sel(time=slice('2006-01-16', '2019-07-01'))
)