Skip to content

Spatial-Temporal Experiment

In this notebook, I will be walking through how we can estimate different methods based on the density cubes that we derive.

import sys, os
from pyprojroot import here
root = here(project_files=[".here"])
sys.path.append(str(here()))

import pathlib

# standard python packages
import xarray as xr
import pandas as pd
import numpy as np

# 
# Experiment Functions
from src.data.esdc import get_dataset
from src.features import Metrics
from sklearn.preprocessing import StandardScaler
from src.models.density import get_rbig_model
from src.features.temporal import select_period, get_smoke_test_time, TimePeriod
from src.features.spatial import select_region, get_europe, get_spain
from src.models.train_models import get_similarity_scores
from src.experiments.utils import dict_product, run_parallel_step

from src.features import Metrics
from src.features.density import get_density_cubes
from src.features.preprocessing import standardizer_data, get_reference_cube, get_common_indices
from src.models.similarity import cka_coefficient, rv_coefficient, rbig_it_measures

# # esdc tools
# from src.esdc.subset import select_pixel
# from src.esdc.shape import ShapeFileExtract, rasterize
# from esdc.transform import DensityCubes

from typing import List, Dict
import xarray as xr

from tqdm import tqdm
import time

import cartopy
import cartopy.crs as ccrs

# NUMPY SETTINGS
import numpy as onp
onp.set_printoptions(precision=3, suppress=True)

# MATPLOTLIB Settings
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# SEABORN SETTINGS
import seaborn as sns
sns.set_context(context='talk',font_scale=0.7)
# sns.set(rc={'figure.figsize': (12, 9.)})
# sns.set_style("whitegrid")

# PANDAS SETTINGS
import pandas as pd
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

# LOGGING SETTINGS
import sys
import logging
logging.basicConfig(
    level=logging.INFO, 
    stream=sys.stdout,
    format='%(asctime)s: %(levelname)s: %(message)s'
)
logger = logging.getLogger()
#logger.setLevel(logging.INFO)

%load_ext autoreload
%autoreload 2

Results

!ls /media/disk/erc/papers/2020_rbig_eo/results/spa_temp/information/world/resample
# !mkdir /media/disk/erc/papers/2020_rbig_eo/results/spa_temp/information/world/resample
# !mkdir /media/disk/erc/papers/2020_rbig_eo/results/spa_temp/information/world
# !rm /media/disk/erc/papers/2020_rbig_eo/results/spa_temp/information/world/resample/*.csv
# !cat /media/disk/erc/papers/2020_rbig_eo/results/spa_temp/information/world/resample/gpp_2002_2010_s200000_d1112_v2.csv
gpp_2002_2010_s200000_d1112_v1.csv  lst_2002_2010_s200000_d224_v1.csv
gpp_2002_2010_s200000_d111_v1.csv   lst_2002_2010_s200000_d441_v1.csv
gpp_2002_2010_s200000_d224_v1.csv   precip_2002_2010_s200000_d1112_v1.csv
gpp_2002_2010_s200000_d441_v1.csv   precip_2002_2010_s200000_d111_v1.csv
lai_2002_2010_s200000_d1112_v1.csv  precip_2002_2010_s200000_d224_v1.csv
lai_2002_2010_s200000_d111_v1.csv   precip_2002_2010_s200000_d441_v1.csv
lai_2002_2010_s200000_d224_v1.csv   sm_2002_2010_s200000_d1112_v1.csv
lai_2002_2010_s200000_d441_v1.csv   sm_2002_2010_s200000_d111_v1.csv
lst_2002_2010_s200000_d1112_v1.csv  sm_2002_2010_s200000_d224_v1.csv
lst_2002_2010_s200000_d111_v1.csv   sm_2002_2010_s200000_d441_v1.csv
RES_PATH = pathlib.Path(str(root)).joinpath("data/spa_temp/info_earth/world")
SAVE_PATH = pathlib.Path("/media/disk/erc/papers/2020_rbig_eo/results/spa_temp/information/world")
FIG_PATH = pathlib.Path(str(root)).joinpath("reports/figures/spa_temp/demos/infoearth/spain")
!ls $RES_PATH/prob_cubes
# !rm -rf $RES_PATH/prob_cubes/*
v1_world_gpp_2002_2010_s200000_d1112_rs1ms.h5
v1_world_gpp_2002_2010_s200000_d111_rs1ms.h5
v1_world_gpp_2002_2010_s200000_d224_rs1ms.h5
v1_world_gpp_2002_2010_s200000_d332_rs1ms.h5
v1_world_gpp_2002_2010_s200000_d441_rs1ms.h5
v1_world_lai_2002_2010_s200000_d1112_rs1ms.h5
v1_world_lai_2002_2010_s200000_d111_rs1ms.h5
v1_world_lai_2002_2010_s200000_d224_rs1ms.h5
v1_world_lai_2002_2010_s200000_d332_rs1ms.h5
v1_world_lai_2002_2010_s200000_d441_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d1112_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d111_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d221_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d224_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d331_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d332_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d441_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d551_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d661_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d771_rs1ms.h5
v2_world_lai_2002_2010_s200000_d1112_rs1ms.h5
v2_world_lai_2002_2010_s200000_d111_rs1ms.h5
v2_world_lai_2002_2010_s200000_d224_rs1ms.h5
v2_world_lai_2002_2010_s200000_d332_rs1ms.h5
v2_world_lai_2002_2010_s200000_d441_rs1ms.h5
vt_world_gpp_2002_2010_s200000_d111_rs1ms.h5
vt_world_gpp_2002_2010_s200000_d111_rs.h5
vt_world_gpp_2002_2010_s200000_d441_rs.h5
variables = ['rm', 'precip', 'gpp', 'lst', 'lai']

variable = 'gpp'
version = 'v1'
period = '2002_2010'
dimensions = '111'
samples = '200000'
save_name = f"{variable}_{period}_s{samples}_d{dimensions}_{version}"

def load_world_cubes_rs(
    variable: str='gpp', 
    period: str='2002_2010', 
    samples: str='200000',
    dimensions: str='111',
    version: str='v1',
    split: bool=True,
):
    if split:
        filename_north = f"{version}_north_{variable}_{period}_s{samples}_d{dimensions}"
        filename_south = f"{version}_south_{variable}_{period}_s{samples}_d{dimensions}"

        prob_cubes_n = xr.open_dataset(RES_PATH.joinpath(f"prob_cubes/{filename_north}.h5"))
        prob_cubes_s = xr.open_dataset(RES_PATH.joinpath(f"prob_cubes/{filename_south}.h5"))
        prob_cubes = xr.merge([prob_cubes_n, prob_cubes_s])
    else:
        filename = f"{version}_world_{variable}_{period}_s{samples}_d{dimensions}"
        prob_cubes = xr.open_dataset(RES_PATH.joinpath(f"prob_cubes/{filename}.h5"))

    return prob_cubes
# variables = ['gpp', 'sm', 'lai', 'lst', 'precip']
# dimensions = ['111', '224', '441', '1112']
# version = 'v1'
# period = '2002_2010'
# samples = '200000'

# for ivariable in variables:
#     for idim in dimensions:
#         t = load_world_cubes_rs(variable=ivariable, period=period, samples=samples, version=version, dimensions=idim, split=False)
#         save_name = f"{ivariable}_{period}_s{samples}_d{idim}_{version}"
#         t.to_dataframe().dropna().to_csv(SAVE_PATH.joinpath(f"resample/{save_name}.csv"))
!ls $RES_PATH/prob_cubes
v1_world_gpp_2002_2010_s200000_d1112_rs1ms.h5
v1_world_gpp_2002_2010_s200000_d111_rs1ms.h5
v1_world_gpp_2002_2010_s200000_d224_rs1ms.h5
v1_world_gpp_2002_2010_s200000_d332_rs1ms.h5
v1_world_gpp_2002_2010_s200000_d441_rs1ms.h5
v1_world_gpp_2002_2010_s500000_d1112_rs1ms.h5
v1_world_gpp_2002_2010_s500000_d1112_rs1MS.h5
v1_world_gpp_2002_2010_s500000_d224_rs1ms.h5
v1_world_gpp_2002_2010_s500000_d224_rs1MS.h5
v1_world_gpp_2002_2010_s500000_d332_rs1ms.h5
v1_world_gpp_2002_2010_s500000_d332_rs1MS.h5
v1_world_gpp_2002_2010_s500000_d441_rs1ms.h5
v1_world_gpp_2002_2010_s500000_d441_rs1MS.h5
v1_world_gpp_2002_2010_s500000_d551_rs1MS.h5
v1_world_lai_2002_2010_s200000_d1112_rs1ms.h5
v1_world_lai_2002_2010_s200000_d111_rs1ms.h5
v1_world_lai_2002_2010_s200000_d224_rs1ms.h5
v1_world_lai_2002_2010_s200000_d332_rs1ms.h5
v1_world_lai_2002_2010_s200000_d441_rs1ms.h5
v1_world_lai_2002_2010_s500000_d1112_rs1MS.h5
v1_world_lai_2002_2010_s500000_d224_rs1MS.h5
v1_world_lai_2002_2010_s500000_d332_rs1MS.h5
v1_world_lai_2002_2010_s500000_d336_rs1MS.h5
v1_world_lai_2002_2010_s500000_d441_rs1MS.h5
v1_world_lai_2002_2010_s500000_d551_rs1MS.h5
v1_world_lai_2002_2010_s500000_d661_rs1MS.h5
v1_world_lai_2002_2010_s500000_d771_rs1MS.h5
v2_world_gpp_2002_2010_s200000_d1112_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d111_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d221_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d224_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d331_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d332_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d441_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d551_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d661_rs1ms.h5
v2_world_gpp_2002_2010_s200000_d771_rs1ms.h5
v2_world_gpp_2002_2010_s500000_d1112_rs1ms.h5
v2_world_gpp_2002_2010_s500000_d224_rs1ms.h5
v2_world_gpp_2002_2010_s500000_d332_rs1ms.h5
v2_world_gpp_2002_2010_s500000_d336_rs1ms.h5
v2_world_gpp_2002_2010_s500000_d551_rs1ms.h5
v2_world_gpp_2002_2010_s500000_d661_rs1ms.h5
v2_world_gpp_2002_2010_s500000_d771_rs1ms.h5
v2_world_lai_2002_2010_s200000_d1112_rs1ms.h5
v2_world_lai_2002_2010_s200000_d111_rs1ms.h5
v2_world_lai_2002_2010_s200000_d224_rs1ms.h5
v2_world_lai_2002_2010_s200000_d332_rs1ms.h5
v2_world_lai_2002_2010_s200000_d441_rs1ms.h5
v2_world_lai_2002_2010_s500000_d1112_rs1ms.h5
v2_world_lai_2002_2010_s500000_d224_rs1ms.h5
v2_world_lai_2002_2010_s500000_d332_rs1ms.h5
v2_world_lai_2002_2010_s500000_d336_rs1ms.h5
v2_world_lai_2002_2010_s500000_d441_rs1ms.h5
v2_world_lai_2002_2010_s500000_d551_rs1ms.h5
v2_world_lai_2002_2010_s500000_d661_rs1ms.h5
v2_world_lai_2002_2010_s500000_d771_rs1ms.h5
v3_world_gpp_2002_2010_s500000_d1112_rs1ms.h5
v3_world_gpp_2002_2010_s500000_d1112_rs1MS.h5
v3_world_gpp_2002_2010_s500000_d224_rs1ms.h5
v3_world_gpp_2002_2010_s500000_d224_rs1MS.h5
v3_world_gpp_2002_2010_s500000_d332_rs1ms.h5
v3_world_gpp_2002_2010_s500000_d336_rs1ms.h5
v3_world_gpp_2002_2010_s500000_d336_rs1MS.h5
v3_world_gpp_2002_2010_s500000_d441_rs1MS.h5
v3_world_gpp_2002_2010_s500000_d551_rs1MS.h5
v3_world_gpp_2002_2010_s500000_d661_rs1ms.h5
v3_world_gpp_2002_2010_s500000_d661_rs1MS.h5
v3_world_gpp_2002_2010_s500000_d771_rs1ms.h5
v3_world_gpp_2002_2010_s500000_d771_rs1MS.h5
v3_world_lai_2002_2010_s500000_d1112_rs1ms.h5
v3_world_lai_2002_2010_s500000_d1112_rs1MS.h5
v3_world_lai_2002_2010_s500000_d224_rs1ms.h5
v3_world_lai_2002_2010_s500000_d224_rs1MS.h5
v3_world_lai_2002_2010_s500000_d332_rs1ms.h5
v3_world_lai_2002_2010_s500000_d332_rs1MS.h5
v3_world_lai_2002_2010_s500000_d336_rs1ms.h5
v3_world_lai_2002_2010_s500000_d336_rs1MS.h5
v3_world_lai_2002_2010_s500000_d441_rs1ms.h5
v3_world_lai_2002_2010_s500000_d441_rs1MS.h5
v3_world_lai_2002_2010_s500000_d551_rs1ms.h5
v3_world_lai_2002_2010_s500000_d551_rs1MS.h5
v3_world_lai_2002_2010_s500000_d661_rs1ms.h5
v3_world_lai_2002_2010_s500000_d661_rs1MS.h5
v3_world_lai_2002_2010_s500000_d771_rs1ms.h5
v3_world_lai_2002_2010_s500000_d771_rs1MS.h5
vt_world_gpp_2002_2010_s200000_d111_rs1ms.h5
vt_world_gpp_2002_2010_s200000_d111_rs.h5
vt_world_gpp_2002_2010_s200000_d441_rs.h5
prob_cubes = xr.open_dataset(RES_PATH.joinpath(f"prob_cubes/v3_world_gpp_2002_2010_s500000_d441_rs1MS.h5"))
# prob_cubes_s = xr.open_dataset(RES_PATH.joinpath(f"prob_cubes/v1_south_gpp_2002_2010_s200000_d441.h5"))
# prob_cubes = xr.merge([prob_cubes_n, prob_cubes_s])
prob_cubes
Show/Hide data repr Show/Hide attributes
xarray.Dataset
    • lat: 527
    • lon: 1363
    • lat
      (lat)
      float64
      -55.12 -54.88 ... 76.12 76.38
      array([-55.125, -54.875, -54.625, ...,  75.875,  76.125,  76.375])
    • lon
      (lon)
      float64
      -179.4 -179.1 ... 179.4 179.6
      array([-179.375, -179.125, -178.875, ...,  179.125,  179.375,  179.625])
    • probability
      (lat, lon)
      float64
      ...
      [718301 values with dtype=float64]
  • variable :
    gross_primary_productivity
    region :
    world
    period :
    ['2002_2010', 'Jan-2002', 'Dec-2010']
    input_shape :
    [500000 16]
    rbig_fit_time :
    915.3726491928101
    prob_size :
    [18954756 16]
    rbig_predict_time :
    357.9922869205475
prob_cubes.where(prob_cubes.probability < np.inf)
Show/Hide data repr Show/Hide attributes
xarray.Dataset
    • lat: 527
    • lon: 1363
    • lat
      (lat)
      float64
      -55.12 -54.88 ... 76.12 76.38
      array([-55.125, -54.875, -54.625, ...,  75.875,  76.125,  76.375])
    • lon
      (lon)
      float64
      -179.4 -179.1 ... 179.4 179.6
      array([-179.375, -179.125, -178.875, ...,  179.125,  179.375,  179.625])
    • probability
      (lat, lon)
      float64
      nan nan nan nan ... nan nan nan nan
      array([[nan, nan, nan, ..., nan, nan, nan],
             [nan, nan, nan, ..., nan, nan, nan],
             [nan, nan, nan, ..., nan, nan, nan],
             ...,
             [nan, nan, nan, ..., nan, nan, nan],
             [nan, nan, nan, ..., nan, nan, nan],
             [nan, nan, nan, ..., nan, nan, nan]])
  • variable :
    gross_primary_productivity
    region :
    world
    period :
    ['2002_2010', 'Jan-2002', 'Dec-2010']
    input_shape :
    [500000 16]
    rbig_fit_time :
    446.3563392162323
    prob_size :
    [18954756 16]
    rbig_predict_time :
    3213.8422796726227
for idims in ['441', '224', '1112']:
# for idims in ['771', '336', '1112']:
    save_name = f"v1_world_gpp_2002_2010_s500000_d{idims}_rs1MS.h5"
    with xr.open_dataset(RES_PATH.joinpath(f"prob_cubes/{save_name}")) as prob_cubes:

        plt.figure(figsize=(10, 5))
        prob_cubes
        prob_cubes.where(prob_cubes.probability < np.inf).probability.plot(robust=True, cmap='Reds')
        plt.savefig(SAVE_PATH.joinpath(f"figures/{save_name}.png"))
for idims in ['111', '224', '332', '441', '1112']:
    save_name = f"v1_world_gpp_2002_2010_s200000_d{idims}_rs1ms.h5"
    with xr.open_dataset(RES_PATH.joinpath(f"prob_cubes/{save_name}")) as prob_cubes:

        plt.figure(figsize=(10, 5))
        prob_cubes.probability.plot.pcolormesh(robust=True, cmap='Reds')
        plt.savefig(SAVE_PATH.joinpath(f"figures/{save_name}.png"))
prob_cubes.lat.min(), prob_cubes.lat.max(), prob_cubes.lon.min(), prob_cubes.lon.max()
(<xarray.DataArray 'lat' ()>
 array(-55.625),
 <xarray.DataArray 'lat' ()>
 array(79.875),
 <xarray.DataArray 'lon' ()>
 array(-179.875),
 <xarray.DataArray 'lon' ()>
 array(179.875))
np.sum(prob_cubes.probability * np.log(1 / prob_cubes.probability))
Show/Hide data repr Show/Hide attributes
xarray.DataArray
'probability'
  • -1.676e+06
    array(-1676234.603)
    !ls $RES_PATH/prob_cubes
    
    v1_world_gpp_2002_2010_s200000_d1112_rs1ms.h5
    v1_world_gpp_2002_2010_s200000_d111_rs1ms.h5
    v1_world_gpp_2002_2010_s200000_d224_rs1ms.h5
    v1_world_gpp_2002_2010_s200000_d332_rs1ms.h5
    v1_world_gpp_2002_2010_s200000_d441_rs1ms.h5
    v1_world_lai_2002_2010_s200000_d1112_rs1ms.h5
    v1_world_lai_2002_2010_s200000_d111_rs1ms.h5
    v1_world_lai_2002_2010_s200000_d224_rs1ms.h5
    v1_world_lai_2002_2010_s200000_d332_rs1ms.h5
    v1_world_lai_2002_2010_s200000_d441_rs1ms.h5
    v2_world_gpp_2002_2010_s200000_d1112_rs1ms.h5
    v2_world_gpp_2002_2010_s200000_d111_rs1ms.h5
    v2_world_gpp_2002_2010_s200000_d221_rs1ms.h5
    v2_world_gpp_2002_2010_s200000_d224_rs1ms.h5
    v2_world_gpp_2002_2010_s200000_d331_rs1ms.h5
    v2_world_gpp_2002_2010_s200000_d332_rs1ms.h5
    v2_world_gpp_2002_2010_s200000_d441_rs1ms.h5
    v2_world_lai_2002_2010_s200000_d1112_rs1ms.h5
    v2_world_lai_2002_2010_s200000_d111_rs1ms.h5
    v2_world_lai_2002_2010_s200000_d224_rs1ms.h5
    v2_world_lai_2002_2010_s200000_d332_rs1ms.h5
    v2_world_lai_2002_2010_s200000_d441_rs1ms.h5
    vt_world_gpp_2002_2010_s200000_d111_rs1ms.h5
    vt_world_gpp_2002_2010_s200000_d111_rs.h5
    vt_world_gpp_2002_2010_s200000_d441_rs.h5
    
    # plt.figure(figsize=(10,5))
    # prob_cubes.shannon_info.plot(robust=True, cmap='Reds')
    prob_cubes = xr.open_dataset(RES_PATH.joinpath(f"prob_cubes/v2_world_gpp_2002_2010_s200000_d331_rs1ms.h5"))
    
    plt.figure(figsize=(10, 5))
    prob_cubes.probability.plot.pcolormesh(robust=True, cmap='Reds')
    plt.savefig(SAVE_PATH.joinpath(f"figures/{save_name}.png"))
    
    ---------------------------------------------------------------------------
    NameError                                 Traceback (most recent call last)
    <ipython-input-6-21054266051b> in <module>
          5 plt.figure(figsize=(10, 5))
          6 prob_cubes.probability.plot.pcolormesh(robust=True, cmap='Reds')
    ----> 7 plt.savefig(SAVE_PATH.joinpath(f"figures/{save_name}.png"))
    
    NameError: name 'save_name' is not defined
    prob_cubes.probability.plot.hist(bins=100);
    
    # plt.figure(figsize=(10,5))
    # prob_cubes.shannon_info.plot(robust=True, cmap='Reds')
    prob_cubes = xr.open_dataset(RES_PATH.joinpath(f"prob_cubes/v2_world_gpp_2002_2010_s200000_d111_rs1ms.h5"))
    
    plt.figure(figsize=(10,5))
    prob_cubes.probability.plot.pcolormesh(robust=True, cmap='Reds')
    plt.savefig(SAVE_PATH.joinpath(f"figures/{save_name}.png"))
    
    plt.figure(figsize=(10,5))
    prob_cubes.shannon_info.plot(robust=True, cmap='Reds')
    
    plt.figure(figsize=(10,5))
    prob_cubes.probability.plot(robust=True, cmap='Reds')
    
    <matplotlib.collections.QuadMesh at 0x7ff92faa44f0>
    plt.figure()
    prob_cubes.shannon_info.plot(robust=True, cmap='Reds')
    
    plt.figure()
    prob_cubes.probability.plot(robust=True, cmap='Reds')
    
    <matplotlib.collections.QuadMesh at 0x7ff930194c10>
    prob_cubes_n = xr.open_dataset(RES_PATH.joinpath(f"prob_cubes/v2_north_lst_2002_2010_s200000_d111.h5"))
    prob_cubes_s = xr.open_dataset(RES_PATH.joinpath(f"prob_cubes/v2_south_lst_2002_2010_s200000_d111.h5"))
    prob_cubes_2 = xr.merge([prob_cubes_n, prob_cubes_s])
    prob_cubes_2
    
    Show/Hide data repr Show/Hide attributes
    xarray.Dataset
      • lat: 672
      • lon: 1439
      • lat
        (lat)
        float64
        -83.88 -83.62 ... 83.62 83.88
        array([-83.875, -83.625, -83.375, ...,  83.375,  83.625,  83.875])
      • lon
        (lon)
        float64
        -179.9 -179.6 ... 179.4 179.6
        array([-179.875, -179.625, -179.375, ...,  179.125,  179.375,  179.625])
      • probability
        (lat, lon)
        float64
        nan nan nan ... 0.3533 0.3714
        array([[  nan,   nan,   nan, ...,   nan,   nan,   nan],
               [  nan,   nan,   nan, ...,   nan,   nan,   nan],
               [  nan,   nan,   nan, ...,   nan,   nan,   nan],
               ...,
               [0.394, 0.394, 0.391, ..., 0.405, 0.397, 0.395],
               [0.376, 0.375, 0.384, ..., 0.392, 0.398, 0.394],
               [0.371, 0.366, 0.377, ..., 0.343, 0.353, 0.371]])
      • shannon_info
        (lat, lon)
        float64
        nan nan nan ... 1.07 1.04 0.9906
        array([[  nan,   nan,   nan, ...,   nan,   nan,   nan],
               [  nan,   nan,   nan, ...,   nan,   nan,   nan],
               [  nan,   nan,   nan, ...,   nan,   nan,   nan],
               ...,
               [0.93 , 0.931, 0.939, ..., 0.903, 0.925, 0.928],
               [0.978, 0.98 , 0.957, ..., 0.937, 0.921, 0.932],
               [0.993, 1.005, 0.975, ..., 1.07 , 1.04 , 0.991]])
    
    
    plt.figure()
    prob_cubes_2.shannon_info.plot(robust=True, cmap='Reds')
    
    plt.figure()
    prob_cubes_2.probability.plot(robust=True, cmap='Reds')
    
    <matplotlib.collections.QuadMesh at 0x7fe799a73640>
    plt.figure()
    (prob_cubes.shannon_info - prob_cubes_2.shannon_info).plot(robust=True, cmap='Reds')
    
    plt.figure()
    (prob_cubes.probability - prob_cubes_2.probability).plot(robust=True, cmap='Reds')
    
    <matplotlib.collections.QuadMesh at 0x7fe7944a26d0>

    Experiment I - Spain, 2010, Cube 1x1x1

    For this first experiment, I did something very simple and looked at the density of ESDC as samples.

    variables = ['rm', 'precip', 'gpp', 'lst', 'lai']
    
    variable = 'gpp'
    version = 'v1'
    period = '2002_2010'
    dimensions = '1146'
    samples = '200000'
    save_name = f"{variable}_{period}_s{samples}_d{dimensions}_{version}"
    
    def load_world_data(
        variable: str='gpp', 
        period: str='2002_2010', 
        samples: str='200000',
        dimensions: str='1146',
        version: str='v1',
        split: bool=True,
    ):
        if split:
            filename_north = f"{version}_north_{variable}_{period}_s{samples}_d{dimensions}"
            filename_south = f"{version}_south_{variable}_{period}_s{samples}_d{dimensions}"
    
            probs_df = pd.read_csv(str(RES_PATH.joinpath(f'probs/{filename_north}'+".csv")))
            try:
                probs_df2 = pd.read_csv(str(RES_PATH.joinpath(f'probs/{filename_south}'+".csv")))
                probs_df = pd.concat([probs_df,probs_df2])
            except:
                pass
        else:
            filename = f"{version}_world_{variable}_{period}_s{samples}_d{dimensions}"
            probs_df = pd.read_csv(str(RES_PATH.joinpath(f'probs/{filename}'+".csv")))
    
        return probs_df
    
    t = pd.read_csv("/media/disk/erc/papers/2020_rbig_eo/results/spa_temp/information/world/precip_2002_2010_s200000_d111_v1.csv")
    t.set_index(['lat', 'lon']).to_xarray().shannon_info.plot(robust=True)
    
    <matplotlib.collections.QuadMesh at 0x7fd0e28d2d00>
    probs_df = load_world_data()
    
    def convert_df_2_xr(df: pd.Dataframe) -> xr.Dataset:
        return None
    
    # region = "north"
    period = "2002_2010"
    samples = "200000"
    dimensions = "1146"
    variable = 'precip'
    version = 'v1'
    filename_north = f"{version}_north_{variable}_{period}_s{samples}_d{dimensions}"
    filename_south = f"{version}_south_{variable}_{period}_s{samples}_d{dimensions}"
    save_name = f"{variable}_{period}_s{samples}_d{dimensions}_{version}"
    # read csv file
    probs_df = pd.read_csv(str(RES_PATH.joinpath(f'probs/{filename_north}'+".csv")))
    print(probs_df.shape)
    try:
        probs_df2 = pd.read_csv(str(RES_PATH.joinpath(f'probs/{filename_south}'+".csv")))
        probs_df = pd.concat([probs_df,probs_df2])
    except:
        pass
    print(probs_df.shape)
    # convert to datetime
    # probs_df['time'] = pd.to_datetime(probs_df['time'])
    probs_df.to_csv(SAVE_PATH.joinpath(f"{save_name}.csv"))
    
    # create dataframe in the format for xarray
    probs_df = probs_df.set_index(['lat', 'lon']).rename(columns={"0": 'probs'})
    
    # remove probabilities greater than 1
    # probs_df['probs'][probs_df['probs'] >= 1.0] = np.nan
    
    # shannon info
    probs_df['shannon_info'] = - np.log(probs_df['probs'])
    
    # create xarray cubes
    probs_cubes = xr.Dataset.from_dataframe(probs_df)
    probs_cubes
    
    (202725, 3)
    (202725, 3)
    
    Show/Hide data repr Show/Hide attributes
    xarray.Dataset
      • lat: 334
      • lon: 1436
      • lat
        (lat)
        float64
        0.375 0.625 0.875 ... 83.38 83.62
        array([ 0.375,  0.625,  0.875, ..., 83.125, 83.375, 83.625])
      • lon
        (lon)
        float64
        -179.9 -179.6 ... 179.4 179.6
        array([-179.875, -179.625, -179.375, ...,  179.125,  179.375,  179.625])
      • probs
        (lat, lon)
        float64
        nan nan nan nan ... nan nan nan nan
        array([[nan, nan, nan, ..., nan, nan, nan],
               [nan, nan, nan, ..., nan, nan, nan],
               [nan, nan, nan, ..., nan, nan, nan],
               ...,
               [nan, nan, nan, ..., nan, nan, nan],
               [nan, nan, nan, ..., nan, nan, nan],
               [nan, nan, nan, ..., nan, nan, nan]])
      • shannon_info
        (lat, lon)
        float64
        nan nan nan nan ... nan nan nan nan
        array([[nan, nan, nan, ..., nan, nan, nan],
               [nan, nan, nan, ..., nan, nan, nan],
               [nan, nan, nan, ..., nan, nan, nan],
               ...,
               [nan, nan, nan, ..., nan, nan, nan],
               [nan, nan, nan, ..., nan, nan, nan],
               [nan, nan, nan, ..., nan, nan, nan]])
    from typing import Optional
    def plot_map(xr_data, measure: str, save_name: Optional[str]=None):
        fig, ax = plt.subplots()
    
        if measure == 'probs':
            xr_data.probability.plot(
                ax=ax,
                vmin=0, robust=True, cmap='Reds', 
                cbar_kwargs={'label': "Probability"}
            )
        elif measure == 'info':
            xr_data.shannon_info.plot(
                ax=ax,
                vmin=0, robust=True, cmap='Reds', 
                cbar_kwargs={'label': "Shannon Information"}
            )
        else:
            raise ValueError(f"Unrecognized measure: {measure}")
    
        ax.set(
            xlabel='Longitude',
            ylabel='Latitude'
        )
        plt.tight_layout()
        if save_name:
            fig.savefig(FIG_PATH.joinpath(f"{measure}_{save_name}.png"))
    

    Figure I - Probability Maps

    plot_map(prob_cubes, 'probs',  None)
    plot_map(prob_cubes, 'info',  None)
    
    plot_map(probs_cubes, 'probs',  None)
    plot_map(probs_cubes, 'info',  None)
    

    Figure II - Probability Maps, Per Month

    from typing import Optional
    def plot_map(xr_data, measure: str, save_name: Optional[str]=None):
        fig, ax = plt.subplots()
    
        if measure == 'probs':
            xr_data.probs.mean(dim='time').plot(
                ax=ax,
                vmin=0, robust=True, cmap='Reds', 
                cbar_kwargs={'label': "Probability"}
            )
        elif measure == 'info':
            xr_data.shannon_info.mean(dim='time').plot(
                ax=ax,
                vmin=0, robust=True, cmap='Reds', 
                cbar_kwargs={'label': "Shannon Information"}
            )
        else:
            raise ValueError(f"Unrecognized measure: {measure}")
    
        ax.set(
            xlabel='Longitude',
            ylabel='Latitude'
        )
        plt.tight_layout()
        if save_name:
            fig.savefig(FIG_PATH.joinpath(f"{measure}_{save_name}.png"))
    
    
    def plot_ts(xr_data, measure: str, save_name: Optional[str]=None):
        fig, ax = plt.subplots()
    
        if measure == 'probs':
            xr_data.probs.mean(dim=['lon', 'lat']).plot.line(ax=ax, color='black', linewidth=3)
            ylabel = 'Probability'
        elif measure == 'info':
            xr_data.shannon_info.mean(dim=['lon', 'lat']).plot.line(ax=ax, color='black', linewidth=3)
            ylabel = 'Shannon Information'
        else:
            raise ValueError(f"Unrecognized measure: {measure}")
    
        ax.set(
            xlabel='Time',
            ylabel=ylabel
        )
    
        ax.legend(['Mean Predictions'])
        plt.tight_layout()
        if save_name:
            fig.savefig(FIG_PATH.joinpath(f"{measure}_ts_{save_name}.png"))
    
    def plot_ts_error(xr_data, measure: str, save_name: Optional[str]=None):
    
        if measure == 'probs':
            predictions = xr_data.probs.mean(dim=['lat','lon'])
            std = xr_data.probs.std(dim=['lat','lon'])
            ylabel = 'Probabilities'
        elif measure == 'info':
            predictions = xr_data.shannon_info.mean(dim=['lat','lon'])
            std = xr_data.shannon_info.std(dim=['lat','lon'])
            ylabel = 'Shannon Information'
        else:
            raise ValueError(f"Unrecognized measure: {measure}")
    
    
        fig, ax = plt.subplots()
        ax.plot(xr_data.coords['time'].values, predictions)
        ax.fill_between(
            predictions.coords['time'].values,
            predictions - std,
            predictions + std,
            alpha=0.7, color='orange'
        )
        ax.set(
            xlabel='Time',
            ylabel=ylabel,
        )
        ax.legend(['Mean_predictions'])
        plt.tight_layout()
        if save_name:
            fig.savefig(FIG_PATH.joinpath(f"{measure}_ts_err_{save_name}.png"))
    
    def plot_monthly_map(xr_data, measure: str, save_name: Optional[str]=None):
        plt.figure()
    
        if measure == 'probs':
            xr_data.probs.groupby('time.month').mean().plot.pcolormesh(x='lon', y='lat', col='month', col_wrap=3, vmin=0, robust=True, cmap='Reds')
        elif measure == 'info':
            xr_data.shannon_info.groupby('time.month').mean().plot.pcolormesh(x='lon', y='lat', col='month', col_wrap=3, vmin=0, robust=True, cmap='Reds')
        else:
            raise ValueError(f"Unrecognized measure: {measure}")
    
        plt.savefig(FIG_PATH.joinpath(f"monthly_{save_name}.png"))
    
    plot_monthly_map(probs_cubes, 'probs', None)
    plot_monthly_map(probs_cubes, 'info', None)
    
    <Figure size 432x288 with 0 Axes>
    <Figure size 432x288 with 0 Axes>

    Figure 2.1 - Time Series (Probability)

    plot_ts(probs_cubes, 'probs', None)
    plot_ts(probs_cubes, 'info', None)
    

    Figure 2.2 - Time Series w. Variance

    plot_ts_error(probs_cubes, 'probs', None); 
    plot_ts_error(probs_cubes, 'info', None)
    

    Other Plots

    
    
    
    

    Information

    $$ I(\mathbf{X}) = - \log p(\mathbf{X}) $$

    probs_cubes['shannon_info'] = - np.log(probs_cubes.probs) #* np.log(2)
    
    fig, ax = plt.subplots()
    probs_cubes.shannon_info.mean(dim='time').plot(
        ax=ax,
        robust=True, cmap='Reds', 
        cbar_kwargs={'label': "Shannon Information"}
    )
    ax.set(
        xlabel='Longitude',
        ylabel='Latitude',
    )
    plt.tight_layout()
    
    plt.figure(figsize=(10,10))
    probs_cubes.shannon_info.groupby('time.month').mean().plot.pcolormesh(x='lon', y='lat', col='month', col_wrap=3, robust=True, cmap='Reds')
    
    <xarray.plot.facetgrid.FacetGrid at 0x7f8b177c51c0>
    <Figure size 720x720 with 0 Axes>

    Experiment II - Spain, 2010, Cube 2x2x2

    !ls $RES_PATH/probs
    
    gpp_spain_vgpp_r2010_v0.csv      spain_vrm_r2010_v0.csv
    rm_spain_vrm_r2002_2010_v0.csv    spain_vrm_r2010_v0336.csv
    rm_spain_vrm_r2010_v0.csv     spain_vrm_r2010_v0_222.csv
    spain_gpp_2002_2010_v0_222.csv    spain_vsm_r2010_v0_222.csv
    spain_vgpp_r2002_2010_v0_111.csv
    
    probs_df.head()
    
    probs
    time lat lon
    2002-01-05 43.625 -8.125 0.241431
    -7.875 0.258389
    -7.625 0.239796
    -7.375 0.164524
    -6.875 0.339672
    filename = "spain_gpp_2002_2010_v0_222.csv"
    
    # read csv file
    probs_df = pd.read_csv(str(RES_PATH.joinpath(f'probs/{filename}')))
    
    # convert to datetime
    probs_df['time'] = pd.to_datetime(probs_df['time'])
    
    
    # create dataframe in the format for xarray
    probs_df = probs_df.set_index(['time', 'lat', 'lon']).rename(columns={"0": 'probs'})
    
    # remove probabilities greater than 1
    probs_df['probs'][probs_df['probs'] >= 1.0] = np.nan
    
    # create xarray cubes
    probs_cubes = xr.Dataset.from_dataframe(probs_df, )
    probs_cubes
    
    Show/Hide data repr Show/Hide attributes
    xarray.Dataset
      • lat: 30
      • lon: 49
      • time: 413
      • time
        (time)
        datetime64[ns]
        2002-01-13 ... 2010-12-31
        array(['2002-01-13T00:00:00.000000000', '2002-01-21T00:00:00.000000000',
               '2002-01-29T00:00:00.000000000', ..., '2010-12-15T00:00:00.000000000',
               '2010-12-23T00:00:00.000000000', '2010-12-31T00:00:00.000000000'],
              dtype='datetime64[ns]')
      • lat
        (lat)
        float64
        36.12 36.38 36.62 ... 43.12 43.38
        array([36.125, 36.375, 36.625, 36.875, 37.125, 37.375, 37.625, 37.875, 38.125,
               38.375, 38.625, 38.875, 39.125, 39.375, 39.625, 39.875, 40.125, 40.375,
               40.625, 40.875, 41.125, 41.375, 41.625, 41.875, 42.125, 42.375, 42.625,
               42.875, 43.125, 43.375])
      • lon
        (lon)
        float64
        -8.875 -8.625 ... 2.875 3.125
        array([-8.875, -8.625, -8.375, -8.125, -7.875, -7.625, -7.375, -7.125, -6.875,
               -6.625, -6.375, -6.125, -5.875, -5.625, -5.375, -5.125, -4.875, -4.625,
               -4.375, -4.125, -3.875, -3.625, -3.375, -3.125, -2.875, -2.625, -2.375,
               -2.125, -1.875, -1.625, -1.375, -1.125, -0.875, -0.625, -0.375, -0.125,
                0.125,  0.375,  0.625,  0.875,  1.125,  1.375,  1.625,  1.875,  2.125,
                2.375,  2.625,  2.875,  3.125])
      • probs
        (time, lat, lon)
        float64
        nan nan nan nan ... nan nan nan nan
        array([[[  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                ...,
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan]],
        
               [[  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                ...,
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan]],
        
               [[  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                ...,
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan, 0.584,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan]],
        
               ...,
        
               [[  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                ...,
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan, 0.196,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan]],
        
               [[  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                ...,
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan, 0.03 ,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan]],
        
               [[  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                ...,
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan],
                [  nan,   nan,   nan, ...,   nan,   nan,   nan]]])
    fig, ax = plt.subplots()
    probs_cubes.probs.mean(dim='time').plot(
        ax=ax,
        vmin=0, robust=True, cmap='Reds', 
        cbar_kwargs={'label': "Probabilities"}
    )
    ax.set(
        xlabel='Longitude',
        ylabel='Latitude'
    )
    plt.tight_layout()
    
    fig, ax = plt.subplots()
    probs_cubes.probs.mean(dim=['lon', 'lat']).plot.line(ax=ax, color='black', linewidth=3)
    ax.set(
        xlabel='Time',
        ylabel='Probabilities'
    )
    ax.legend(['Mean Predictions'])
    plt.tight_layout()
    plt.show()
    
    probs_cubes.probs.groupby('time.month').mean().plot.pcolormesh(
        x='lon', y='lat', 
        col='month', col_wrap=3, 
        vmin=0, robust=True, cmap='Reds',
        cbar_kwargs={
            'label': 'Probabilities', 
            'location':'bottom', 
            'shrink': 0.7, 
            'pad': 0.075
        }
    )
    
    <xarray.plot.facetgrid.FacetGrid at 0x7f8b17bed880>

    Anomalies

    probs_cubes['shannon_info'] = - np.log(probs_cubes.probs) #* np.log(2)
    
    fig, ax = plt.subplots()
    probs_cubes.shannon_info.mean(dim='time').plot(
        ax=ax,
        robust=True, cmap='Reds', 
        cbar_kwargs={'label': "Shannon Information"}
    )
    ax.set(
        xlabel='Longitude',
        ylabel='Latitude',
    )
    plt.tight_layout()
    
    plt.figure(figsize=(10,10))
    probs_cubes.shannon_info.groupby('time.month').mean().plot.pcolormesh(
        x='lon', y='lat', 
        col='month', col_wrap=3, 
        robust=True, cmap='Reds',
        cbar_kwargs={
            'label': 'Shannon Entropy', 
            'location':'bottom', 
            'shrink': 0.7, 
            'pad': 0.075
        }
    )
    
    <xarray.plot.facetgrid.FacetGrid at 0x7f8b17391c10>
    <Figure size 720x720 with 0 Axes>
    # define threshold
    instances = 4
    
    # calculate densities
    
    # calculate density threshold
    

    Experiment Steps