Visually Comparing Climate Models¶
Summary¶
In this notebook, I will be comparing three climate reanalysis models:
- NCEP-DOE Reanalysis 2: Surface
- ERA5
- CMIP5
I will be looking at the following variables:
- Mean Sea Level Pressure (CMIP5, ERA5, NCEP)
- Surface Pressure (ERA5, NCEP)
I will be trying to user RBIG in order to assess how similar these models are. I'll be looking at the following IT measures. If these climate models are that similar, then they should exhibit similar IT measures.
Preprocessing Steps¶
Regridded Spatially
- The ERA5 had the coarsest spatial resolution (2.5 x 2.5).
- I regridded the NCEP from (0.25 x 0.25) to (2.5 x 2.5).
- I regridded the CMIP5 from (2 x 2.5) to (2.5 x 2.5).
Temporal Resolution
- ERA5 and NCEP go from 1980-2019
- CMIP5 goes from 2006-2018
- For comparing ERA5 vs CMIP5 and NCEP vs CMIPF, I found the same time components
Measures¶
I'm measuring the following:
- Entropy - expected uncertainty
- Total Correlation - amount of redundant information between features
- Mutual Information - amount of information shared between variables
Data¶
Inputs
I'm taking each year as is. Each spatial location is a sample and each year is a feature. My inputs are:
X \in \mathbb{R}^{\text{spatial } \times \text{ month}}
Outputs
All my information theory measures are in nats. They are scalars.
Hypothesis¶
Simple: The ERA5 and the NCEP model should be more similar than the CMIP5 model compared to each of them.
Data - Climate Models¶
import os, sys
cwd = os.getcwd()
source_path = f"{cwd}/../../../"
sys.path.insert(0, f'{source_path}')
# ESDC tools
sys.path.insert(0, f'/home/emmanuel/code/py_esdc')
# from esdc.preprocessing import normalize_temporal
import cdsapi
from zipfile import ZipFile
import pandas as pd
import xarray as xr
from tqdm import tqdm
from sklearn import preprocessing
# Visualization Tools
from src.data.climate.loader import ResultsLoader
from src.visualization.climate.global_it import plot_global_entropy, plot_global_diff_entropy, plot_global_mutual_info, plot_global_diff_mutual_info
# from src.visualization.climate import PlotResults
import seaborn as sns
import matplotlib.pyplot as plt
# plt.style.use('ggplot')
plt.style.use(['seaborn-notebook', 'fivethirtyeight'])
%matplotlib inline
%load_ext autoreload
%autoreload 2
data_path = f"/home/emmanuel/projects/2020_rbig_rs/data/climate/raw/"
results_path = f"/home/emmanuel/projects/2020_rbig_rs/data/climate/results/amip/"
fig_path = f"/home/emmanuel/projects/2020_rbig_rs/reports/figures/climate/"
Experiment I - Global Entropy¶
from pathlib import Path
data_path = f"/home/emmanuel/projects/projects/2020_rbig_rs/data/climate/results/amip/local/compare/"
def get_results_files(base_model: str, trials: bool)-> pd.DataFrame:
path = Path(data_path)
if base_model == 'ncep':
base_pattern = 'ncep'
elif base_model == 'era5':
base_pattern = 'era5'
else:
raise ValueError('Unrecognized base model:', base_model)
trials_ext = 'tr100_v1'
# if trials == True:
# trials_ext = 'v1'
# elif trials == False:
# trials_ext = 'v2'
# else:
# raise ValueError("Unrecognized trials extentions:", trials)
filename_pattern = base_pattern + '*' + trials_ext + '.csv'
df_from_each_file = [pd.read_csv(f, index_col=0) for f in path.rglob(filename_pattern)]
results_df = pd.concat(df_from_each_file, ignore_index=True)
return results_df
def post_processing_compare(df: pd.DataFrame)-> pd.DataFrame:
# divide by the spatial resolution
df['mi'] = df['mi'] / (df['spatial'] ** 2)
return df
df = pd.read_csv('/home/emmanuel/projects/2020_rbig_rs/data/climate/interim/amip/global/individual/ncep_bnu_esm_tr100_v1.csv')
df.head()
data_path = f"/home/emmanuel/projects/2020_rbig_rs/data/climate/interim/amip/global/individual/"
results_df = get_results_files('era5', False)
results_df.head()
results_df.cmip.unique().tolist()
SAVEPATH = '/home/emmanuel/projects/2020_rbig_rs/reports/figures/climate/amip/global/entropy/'
def plot_global_entropy(results_df: pd.DataFrame, base: str, cmip: str, normalized=True, log_mi=True, save=True)-> None:
if normalized == True:
results_df['h_cmip'] = results_df['h_cmip'] / results_df['spatial']**2
results_df['h_base'] = results_df['h_base'] / results_df['spatial']**2
results_df = results_df[results_df['base'] == base]
results_df = results_df[results_df['cmip'] == cmip]
fig, ax = plt.subplots()
sns.scatterplot(data=results_df, x='spatial', y='h_base', label=f"{base}", ax=ax)
sns.scatterplot(data=results_df, x='spatial', y='h_cmip', label=f"{cmip}", ax=ax)
ax.set_title(f'{base.upper()} vs CMIP: {cmip.upper()}')
ax.set_xlabel('Spatial Features')
# ax.set_xlim([2, 6])
ax.set_ylabel('Entropy, H')
ax.legend()
if save:
savename = f"global_h_{base}_{cmip}.png"
fig.savefig(SAVEPATH + savename, )
else:
plt.show()
return None
Example¶
results_df.cmip.unique().tolist()
results_df = get_results_files('era5', False)
plot_global_entropy(results_df, 'era5', 'ipsl_cm5a_lr', normalized=True, save=True)
for imodel in results_df.cmip.unique().tolist():
results_df = get_results_files('era5', False)
plot_global_entropy(results_df, 'era5', imodel, normalized=True, save=True)
# plot_global_entropy(results, 'ncep', 'inmcm4', normalized=False)
for imodel in results_df.cmip.unique().tolist():
results_df = get_results_files('ncep', False)
plot_global_entropy(results_df, 'ncep', imodel, normalized=True,)
Difference in Entropy¶
import numpy as np
def plot_global_diff_entropy(results_df: pd.DataFrame, base: str, normalized=True, log_mi=False)-> None:
results_copy = results_df[results_df['base'] == base]
if normalized == True:
results_copy['h_base'] = results_copy['h_base'] / results_copy['spatial']**2
results_copy['h_cmip'] = results_copy['h_cmip'] / results_copy['spatial']**2
results_copy = results_df[results_df['base'] == base]
# calculate difference
results_copy['h_diff'] = np.abs(results_copy['h_cmip'] - results_copy['h_base'])
if log_mi == True:
results_copy['h_diff'] = np.log(results_copy['h_diff'])
fig, ax = plt.subplots()
# sns.scatterplot(ax=ax, data=results_copy, x='spatial', y='h_diff', hue='base', color='black')
sns.lineplot(ax=ax, data=results_copy, x='spatial', y='h_diff', hue='cmip')
plt.title(f'')
plt.xlabel('Spatial Features')
plt.ylabel('Entropy, H')
plt.legend(ncol=2, bbox_to_anchor=(2.05, 1), fontsize=16)
# plt.tight_layout()
plt.show()
results_df = get_results_files('era5', False)
plot_global_diff_entropy(results_df, 'era5', normalized=True, save=True)
results_df = get_results_files('ncep', False)
plot_global_diff_entropy(results_df, 'ncep', normalized=True, save=True)
Mutual Information¶
from pathlib import Path
data_path = f"/home/emmanuel/projects/projects/2020_rbig_rs/data/climate/results/amip/global/compare/"
def get_results_files(base_model: str, trials: bool)-> pd.DataFrame:
path = Path(data_path)
if base_model == 'ncep':
base_pattern = 'ncep'
elif base_model == 'era5':
base_pattern = 'era5'
else:
raise ValueError('Unrecognized base model:', base_model)
if trials == True:
trials_ext = 'v1'
elif trials == False:
trials_ext = 'v2'
else:
raise ValueError("Unrecognized trials extentions:", trials)
filename_pattern = base_pattern + '*' + trials_ext + '.csv'
# [print(f) for f in path.rglob(filename_pattern)]
# [print(pd.read_csv(f, index_col=0).columns.shape) for f in path.rglob(filename_pattern)]
df_from_each_file = [pd.read_csv(f, index_col=0) for f in path.rglob(filename_pattern)]
results_df = pd.concat(df_from_each_file, ignore_index=True)
return results_df
def post_processing_compare(df: pd.DataFrame)-> pd.DataFrame:
# divide by the spatial resolution
df['mi'] = df['mi'] / (df['spatial'] ** 2)
return df
data_path = f"/home/emmanuel/projects/2020_rbig_rs/data/climate/results/amip/global/compare/"
results_df = get_results_files('ncep', True)
results_df.tail()
results_df.cmip.unique().tolist()
results_df = get_results_files('ncep', True)
plot_global_mutual_info(results_df, base='ncep', cmip='giss_e2_r', measure='mi', normalized=True, log_mi=True)
results_df = get_results_files('ncep', True)
plot_global_mutual_info(results_df, base='ncep', cmip=None, measure='mi', normalized=True, )
results_df = get_results_files('era5', True)
plot_global_mutual_info(results_df, base='era5', cmip=None, measure='mi', normalized=True, log_mi=True)
for imodel in results_df.cmip.unique().tolist():
results_df = get_results_files('ncep', True)
plot_global_mutual_info(results_df, base='ncep', cmip=imodel, measure='mi', normalized=True, log_mi=True)
for imodel in results_df.cmip.unique().tolist():
results_df = get_results_files('era5', True)
plot_global_mutual_info(results_df, base='era5', cmip=imodel, measure='mi', normalized=True, log_mi=False)
def plot_global_mi(results_df: pd.DataFrame, base: str, cmip: str, measure='mi', normalized=True)-> None:
results_df = results_df[results_df['base'] == base]
results_df = results_df[results_df['cmip'] == cmip]
if normalized and measure == 'mi':
results_df[measure] = results_df[measure] / results_df['spatial']
sns.scatterplot(data=results_df, x='spatial', y=measure, label=f"{base}")
sns.scatterplot(data=results_df, x='spatial', y=measure, label=f"{cmip}")
plt.title(f'{base.upper()}, CMIP: {cmip.upper()}')
plt.xlabel('Spatial Features')
plt.ylabel(f'{measure.upper()}')
plt.legend()
plt.show()
plot_global_mi(results_df, 'ncep', 'access1_0', 'mi')
# plot_global_mi(results_df, 'era5', 'noresm1_m', 'pearson')
# plot_global_mi(results_df, 'era5', 'noresm1_m', 'spearman')
# plot_global_mi(results_df, 'era5', 'noresm1_m', 'kendelltau')
plot_global_mi(results_df, 'ncep', 'giss_e2_r', 'mi')
plot_global_mi(results_df, 'ncep', 'giss_e2_r', 'pearson')
plot_global_mi(results_df, 'ncep', 'giss_e2_r', 'spearman')
plot_global_mi(results_df, 'ncep', 'giss_e2_r', 'kendelltau')
def plot_global_diff_mi(results_df: pd.DataFrame, base: str, measure='mi', normalized=True, log_mi=True)-> None:
results_copy = results_df[results_df['base'] == base]
if normalized == True:
results_copy[measure] = results_copy[measure] / results_copy['spatial']
if log_mi == True and measure == 'mi':
results_copy[measure] = np.log(results_copy[measure])
fig, ax = plt.subplots()
# sns.scatterplot(ax=ax, data=results_copy, x='spatial', y='h_diff', hue='base', color='black')
sns.scatterplot(ax=ax, data=results_copy, x='spatial', y=measure, hue='cmip')
plt.title(f'')
plt.xlabel('Spatial Features')
plt.ylabel(f'{measure.upper()}')
plt.legend(ncol=2, bbox_to_anchor=(2.05, 1), fontsize=16)
# plt.tight_layout()
plt.show()
giss_e2_r
plot_global_diff_mi(results_df, 'ncep', 'mi', True, True)
plot_global_diff_mi(results_df, 'ncep', 'pearson', True, True)
plot_global_diff_mi(results_df, 'ncep', 'spearman', True, True)
plot_global_diff_mi(results_df, 'ncep', 'kendelltau', True, True)