Skip to content

Visually Comparing Climate Models


Summary

In this notebook, I will be comparing three climate reanalysis models:

  • NCEP-DOE Reanalysis 2: Surface
  • ERA5
  • CMIP5

I will be looking at the following variables:

  • Mean Sea Level Pressure (CMIP5, ERA5, NCEP)
  • Surface Pressure (ERA5, NCEP)

I will be trying to user RBIG in order to assess how similar these models are. I'll be looking at the following IT measures. If these climate models are that similar, then they should exhibit similar IT measures.


Preprocessing Steps

Regridded Spatially

  • The ERA5 had the coarsest spatial resolution (2.5 x 2.5).
  • I regridded the NCEP from (0.25 x 0.25) to (2.5 x 2.5).
  • I regridded the CMIP5 from (2 x 2.5) to (2.5 x 2.5).

Temporal Resolution

  • ERA5 and NCEP go from 1980-2019
  • CMIP5 goes from 2006-2018
  • For comparing ERA5 vs CMIP5 and NCEP vs CMIPF, I found the same time components


Measures

I'm measuring the following:

  • Entropy - expected uncertainty
  • Total Correlation - amount of redundant information between features
  • Mutual Information - amount of information shared between variables


Data

Inputs

I'm taking each year as is. Each spatial location is a sample and each year is a feature. My inputs are:

X \in \mathbb{R}^{\text{spatial } \times \text{ month}}

Outputs

All my information theory measures are in nats. They are scalars.


Hypothesis

Simple: The ERA5 and the NCEP model should be more similar than the CMIP5 model compared to each of them.


Data - Climate Models

!ls /home/emmanuel/projects/2020_rbig_rs/data/climate/results/amip/local/compare/
era5_access1_0_v1.csv    era5_ipsl_cm5a_lr_v2.csv  ncep_cnrm_cm5_v1.csv
era5_access1_0_v2.csv     era5_mpi_esm_lr_v1.csv    ncep_cnrm_cm5_v2.csv
era5_bcc_csm1_1_v1.csv    era5_mpi_esm_lr_v2.csv    ncep_giss_e2_r_v1.csv
era5_bcc_csm1_1_v2.csv    era5_noresm1_m_v1.csv     ncep_giss_e2_r_v2.csv
era5_bnu_esm_v1.csv   era5_noresm1_m_v2.csv     ncep_ipsl_cm5a_lr_v1.csv
era5_bnu_esm_v2.csv   ncep_access1_0_v1.csv     ncep_ipsl_cm5a_lr_v2.csv
era5_cnrm_cm5_v1.csv      ncep_access1_0_v2.csv     ncep_mpi_esm_lr_v1.csv
era5_cnrm_cm5_v2.csv      ncep_bcc_csm1_1_v1.csv    ncep_mpi_esm_lr_v2.csv
era5_giss_e2_r_v1.csv     ncep_bcc_csm1_1_v2.csv    ncep_noresm1_m_v1.csv
era5_giss_e2_r_v2.csv     ncep_bnu_esm_v1.csv       ncep_noresm1_m_v2.csv
era5_ipsl_cm5a_lr_v1.csv  ncep_bnu_esm_v2.csv
import os, sys
cwd = os.getcwd()
source_path = f"{cwd}/../../../"
sys.path.insert(0, f'{source_path}')

# ESDC tools
sys.path.insert(0, f'/home/emmanuel/code/py_esdc')
# from esdc.preprocessing import normalize_temporal
from pathlib import Path


import cdsapi
from zipfile import ZipFile
import pandas as pd
import xarray as xr
from tqdm import tqdm
from sklearn import preprocessing

# Visualization Tools
# from src.data.climate.loader import ResultsLoader
# from src.visualization.climate import PlotResults
from src.visualization.climate.compare import plot_individual, plot_all

import seaborn as sns
import matplotlib.pyplot as plt
# plt.style.use('ggplot')
plt.style.use(['seaborn-poster', 'fivethirtyeight'])

%matplotlib inline

%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

Experiment I - Local

data_path = f"/home/emmanuel/projects/2020_rbig_rs/data/climate/results/amip/local/compare/"

def get_results_files(base_model: str, trials: bool)-> pd.DataFrame:

    path = Path(data_path)

    if base_model == 'ncep':
        base_pattern = 'ncep'

    elif base_model == 'era5':
        base_pattern = 'era5'

    else:
        raise ValueError('Unrecognized base model:', base_model)

    if trials == True:
        trials_ext = 'v1'
    elif trials == False:
        trials_ext = 'v2'
    else:
        raise ValueError("Unrecognized trials extentions:", trials)

    filename_pattern = base_pattern + '*' + trials_ext + '.csv'

    df_from_each_file = [pd.read_csv(f, index_col=0) for f in path.rglob(filename_pattern)]
    results_df   = pd.concat(df_from_each_file, ignore_index=True)
    return results_df
def post_processing_compare(df: pd.DataFrame)-> pd.DataFrame:

    # divide by the spatial resolution
    df['mi'] = df['mi'] / (df['spatial'] ** 2)

    return df

NCEP - Individual IT Measures

data_path = f"/home/emmanuel/projects/2020_rbig_rs/data/climate/results/amip/local/compare/"
fig_path = f"/home/emmanuel/projects/2020_rbig_rs/reports/figures/climate/amip/local/compare/"
# extract results
results_df = get_results_files('ncep', False)
# results_df.head()
# # post processing

results_df = post_processing_compare(results_df, )
results_df.head()
base base_time cmip cmip_time kendelltau mi pearson spatial spearman subsample time_mi trial variable
0 ncep 1979-02-01 cnrm_cm5 1979-01-16 12:00:00 0.404668 93.762735 0.472259 1.0 0.560047 50000.0 462.748412 0.0 psl
1 ncep 1979-03-01 cnrm_cm5 1979-02-15 00:00:00 0.581296 93.383035 0.706488 1.0 0.774481 50000.0 461.874625 0.0 psl
2 ncep 1979-04-01 cnrm_cm5 1979-03-16 12:00:00 0.633132 94.526655 0.852317 1.0 0.814823 50000.0 462.157740 0.0 psl
3 ncep 1979-05-01 cnrm_cm5 1979-04-16 00:00:00 0.642649 93.431233 0.848718 1.0 0.809581 50000.0 462.089180 0.0 psl
4 ncep 1979-06-01 cnrm_cm5 1979-05-16 12:00:00 0.582758 93.599717 0.847242 1.0 0.744458 50000.0 462.955628 0.0 psl

We want to extract the different CMIP models. The other parameters are constant for now.

sample_spatial = 1.0
for ispatial in [1.0, 2.0, 3.0, 4.0, 5.0]:

    plot_all(results_df, ispatial, 'pearson')

ERA5 - Individual IT Measures

# extract results
trials = False
results_df = get_results_files('era5', trials=True)

# post processing

results_df = post_processing_compare(results_df)
sample_spatial = 1.0
for ispatial in [1.0, 2.0, 3.0, 4.0, 5.0]:

    plot_all(results_df, ispatial, 'pearson')

Trials

NCEP - Comparative IT Measures

# extract results
trials = False
results_df = get_results_files('ncep', trials=True)

# post processing

results_df = post_processing_compare(results_df)

sample_spatial = 1.0
for ispatial in [1.0, 2.0, 3.0, 4.0, 5.0]:

    plot_all(results_df, ispatial, 'pearson')

ERA5 - Comparative IT Measures

# extract results
trials = False
results_df = get_results_files('era5', trials=True)

# post processing

results_df = post_processing_compare(results_df)

sample_spatial = 1.0
for ispatial in [1.0, 2.0, 3.0, 4.0, 5.0]:

    plot_all(results_df, ispatial, 'pearson')

Results

Mean Sea Level Pressure

CMIP5 vs ERA5 vs NCEP

variables = [
    'mslp_era_cmip', 
    'mslp_ncep_cmip'
]

filenames = [f"{results_path}{variable}.csv" for variable in variables]

results = ResultsLoader(filenames).load_dataframes()

# initializer plotter
plotter = PlotResults(results)
plotter.results = plotter.results[plotter.results['year'] < 2019]

Entropy

ent1_fig, ent1_ax = plotter.plot_entropy()

Total Correlation

tc1_fig, tc1_ax = plotter.plot_total_correlation()

Mutual Information

This is the MI between CMIP5 and the two models (ERA5 and NCEP)

mi1_fig, mi1_ax = plotter.plot_mutual_information(('model', ['cmip5']))

NCEP vs ERA5

Mean Sea Level Pressure

variables = [
    'mslp_ncep_era'
]

filenames = [f"{results_path}{variable}.csv" for variable in variables]

results = ResultsLoader(filenames).load_dataframes()

# initializer plotter
plotter = PlotResults(results)
plotter.results = plotter.results[plotter.results['year'] < 2017]  

Entropy

plotter.plot_entropy();

Total Correlation

plotter.plot_total_correlation();

Mutual Information

The MI between ERA5 and the NCAR_NCEP_DOE_2 model.

plotter.plot_mutual_information(omit_models=('model', ['ncar_ncep_doe_2']));

Surface Pressure

variables = [
    'sp_ncep_era'
]

filenames = [f"{results_path}{variable}.csv" for variable in variables]

results = ResultsLoader(filenames).load_dataframes()

# initializer plotter
plotter = PlotResults(results)
plotter.results = plotter.results[plotter.results['year'] < 2018]  

Entropy

plotter.plot_entropy();

Total Correlation

plotter.plot_total_correlation();

Mutual Information

The MI between ERA5 and NCEP.

plotter.plot_mutual_information(omit_models=('model', ['ncar_ncep_doe_2']));