2.1 regridding

import os, sys
cwd = os.getcwd()
source_path = f"{cwd}/../../"
sys.path.insert(0, f'{source_path}')

import numpy as np

# Import RBIG Helper
from src.models.train_models import run_rbig_models

# ESDC tools
sys.path.insert(0, f'/home/emmanuel/code/py_esdc')
from esdc.standardize import normalize_temporal
from esdc.transform import regrid_data

import cdsapi
from zipfile import ZipFile
import pandas as pd
import xarray as xr
from tqdm import tqdm
from sklearn import preprocessing

import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

%load_ext autoreload
%autoreload 2
data_path = f"/home/emmanuel/projects/2020_rbig_rs/data/climate/raw/"
results_path = f"/home/emmanuel/projects/2020_rbig_rs/data/climate/results/"
fig_path = f"/home/emmanuel/projects/2020_rbig_rs/reports/figures/climate/"
ncep_data = xr.open_mfdataset(f"{data_path}*mon.mean.nc")
ncep_data = ncep_data.rename({'pres': 'sp'})
ncep_data.attrs['model_id'] = 'ncar_ncep_doe_2'
ncep_data
<xarray.Dataset>
Dimensions:    (lat: 73, lon: 144, nbnds: 2, time: 489)
Coordinates:
  * lat        (lat) float32 90.0 87.5 85.0 82.5 ... -82.5 -85.0 -87.5 -90.0
  * lon        (lon) float32 0.0 2.5 5.0 7.5 10.0 ... 350.0 352.5 355.0 357.5
  * time       (time) datetime64[ns] 1979-01-01 1979-02-01 ... 2019-09-01
Dimensions without coordinates: nbnds
Data variables:
    mslp       (time, lat, lon) float32 dask.array<chunksize=(489, 73, 144), meta=np.ndarray>
    time_bnds  (time, nbnds) datetime64[ns] dask.array<chunksize=(489, 2), meta=np.ndarray>
    pr_wtr     (time, lat, lon) float32 dask.array<chunksize=(489, 73, 144), meta=np.ndarray>
    sp         (time, lat, lon) float32 dask.array<chunksize=(489, 73, 144), meta=np.ndarray>
Attributes:
    Conventions:    CF-1.0
    title:          Monthly NCEP/DOE Reanalysis 2
    history:        created 2002/03 by Hoop (netCDF2.3)
    comments:       Data is from \nNCEP/DOE AMIP-II Reanalysis (Reanalysis-2)...
    platform:       Model
    source:         NCEP/DOE AMIP-II Reanalysis (Reanalysis-2) Model
    institution:    National Centers for Environmental Prediction
    dataset_title:  NCEP-DOE AMIP-II Reanalysis
    References:     https://www.esrl.noaa.gov/psd/data/gridded/data.ncep.rean...
    source_url:     http://www.cpc.ncep.noaa.gov/products/wesley/reanalysis2/
    model_id:       ncar_ncep_doe_2
era5_data = xr.open_dataset(f"{data_path}ERA5.nc")
era5_data = era5_data.rename({'msl': 'mslp', 'latitude': 'lat', 'longitude': 'lon'})
# era5_data = era5_data.rename({'latitude': 'lat'})
# era5_data.attrs['model_id'] = 'era5'
# rescale model from 0.25 to 2.5 degrees
# era5_data = era5_data.coarsen(lat=10, lon=10, boundary='pad').mean()
era5_data.attrs['model_id'] = 'era5'
era5_data
<xarray.Dataset>
Dimensions:  (lat: 721, lon: 1440, time: 487)
Coordinates:
  * lon      (lon) float32 0.0 0.25 0.5 0.75 1.0 ... 359.0 359.25 359.5 359.75
  * lat      (lat) float32 90.0 89.75 89.5 89.25 ... -89.25 -89.5 -89.75 -90.0
  * time     (time) datetime64[ns] 1979-01-01 1979-02-01 ... 2019-07-01
Data variables:
    mslp     (time, lat, lon) float32 ...
    sp       (time, lat, lon) float32 ...
Attributes:
    Conventions:  CF-1.6
    history:      2019-10-07 09:20:10 GMT by grib_to_netcdf-2.10.0: /opt/ecmw...
    model_id:     era5
import xesmf as xe
era5_data_regrid = xr.Dataset(
        {"lat": (["lat"], ncep_data.lat), "lon": (["lon"], ncep_data.lon)}
    )
method = 'conservative'
regridder = xe.Regridder(
    ncep_data, 
    era5_data_regrid, 
    method, 
    reuse_weights=True
)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/.conda/envs/2019_rbig_ad/lib/python3.6/site-packages/xarray/core/dataset.py in _construct_dataarray(self, name)
   1150         try:
-> 1151             variable = self._variables[name]
   1152         except KeyError:

KeyError: 'lon_b'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-9-73cf5d2042a7> in <module>
      4     era5_data_regrid,
      5     method,
----> 6     reuse_weights=True
      7 )

~/.conda/envs/2019_rbig_ad/lib/python3.6/site-packages/xesmf/frontend.py in __init__(self, ds_in, ds_out, method, periodic, filename, reuse_weights)
    135         self._grid_in, shape_in = ds_to_ESMFgrid(ds_in,
    136                                                  need_bounds=self.need_bounds,
--> 137                                                  periodic=periodic
    138                                                  )
    139         self._grid_out, shape_out = ds_to_ESMFgrid(ds_out,

~/.conda/envs/2019_rbig_ad/lib/python3.6/site-packages/xesmf/frontend.py in ds_to_ESMFgrid(ds, need_bounds, periodic, append)
     65 
     66     if need_bounds:
---> 67         lon_b = np.asarray(ds['lon_b'])
     68         lat_b = np.asarray(ds['lat_b'])
     69         lon_b, lat_b = as_2d_mesh(lon_b, lat_b)

~/.conda/envs/2019_rbig_ad/lib/python3.6/site-packages/xarray/core/dataset.py in __getitem__(self, key)
   1241 
   1242         if hashable(key):
-> 1243             return self._construct_dataarray(key)
   1244         else:
   1245             return self._copy_listed(np.asarray(key))

~/.conda/envs/2019_rbig_ad/lib/python3.6/site-packages/xarray/core/dataset.py in _construct_dataarray(self, name)
   1152         except KeyError:
   1153             _, name, variable = _get_virtual_variable(
-> 1154                 self._variables, name, self._level_coords, self.dims
   1155             )
   1156 

~/.conda/envs/2019_rbig_ad/lib/python3.6/site-packages/xarray/core/dataset.py in _get_virtual_variable(variables, key, level_vars, dim_sizes)
    144         ref_var = dim_var.to_index_variable().get_level_variable(ref_name)
    145     else:
--> 146         ref_var = variables[ref_name]
    147 
    148     if var_name is None:

KeyError: 'lon_b'