Spatial-Temporal Experiment¶
In this notebook, I will be walking through how we can estimate different methods based on the density cubes that we derive.
import sys
from pyprojroot import here
sys.path.append(str(here()))
import numpy as np
import pathlib
import xarray as xr
import holoviews as hv
import geoviews as gv
import geoviews.feature as gf
from geoviews import opts
from cartopy import crs as ccrs
gv.extension('matplotlib', 'bokeh')
gv.output(size=200)
import sys, os
from pyprojroot import here
root = here(project_files=[".here"])
sys.path.append(str(here()))
import pathlib
# standard python packages
import xarray as xr
import pandas as pd
import numpy as np
#
# Experiment Functions
from src.data.esdc import get_dataset
from src.features import Metrics
from sklearn.preprocessing import StandardScaler
from src.models.density import get_rbig_model
from src.features.temporal import select_period, get_smoke_test_time, TimePeriod
from src.features.spatial import select_region, get_europe, get_spain
from src.models.train_models import get_similarity_scores
from src.experiments.utils import dict_product, run_parallel_step
from src.features import Metrics
from src.features.density import get_density_cubes
from src.features.preprocessing import standardizer_data, get_reference_cube, get_common_indices
from src.models.similarity import cka_coefficient, rv_coefficient, rbig_it_measures
# # esdc tools
# from src.esdc.subset import select_pixel
# from src.esdc.shape import ShapeFileExtract, rasterize
# from esdc.transform import DensityCubes
from typing import List, Dict
import xarray as xr
from tqdm import tqdm
import time
import cartopy
import cartopy.crs as ccrs
# NUMPY SETTINGS
import numpy as onp
onp.set_printoptions(precision=3, suppress=True)
# MATPLOTLIB Settings
import matplotlib as mpl
import matplotlib.pyplot as plt
# %matplotlib inline
# %config InlineBackend.figure_format = 'retina'
# GEOVIEWS
import holoviews as hv
import geoviews as gv
import geoviews.feature as gf
from cartopy import crs
from geoviews import opts
gv.extension('matplotlib')
gv.output(size=150)
# SEABORN SETTINGS
import seaborn as sns
sns.set_context(context='talk',font_scale=0.7)
# sns.set(rc={'figure.figsize': (12, 9.)})
# sns.set_style("whitegrid")
# PANDAS SETTINGS
import pandas as pd
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)
# LOGGING SETTINGS
import sys
import logging
logging.basicConfig(
level=logging.INFO,
stream=sys.stdout,
format='%(asctime)s: %(levelname)s: %(message)s'
)
logger = logging.getLogger()
#logger.setLevel(logging.INFO)
import warnings
warnings.simplefilter("ignore")
%load_ext autoreload
%autoreload 2
## Results
RES_PATH = pathlib.Path(str(here())).joinpath("data/spa_temp/info_earth")
FIG_PATH = pathlib.Path(str(here())).joinpath("reports/figures/spa_temp/demos/infoearth/spain")
!ls $RES_PATH/probs
### Experiment I - Spain, 2010, Cube 1x1x1
For this first experiment, I did something very simple and looked at the density of ESDC as samples.
region = "europe"
period = "2002_2010"
samples = "200000"
dimensions = "111"
variable = 'gpp'
filename = f"{region}_{variable}_{period}_v0_s{samples}_d{dimensions}"
# read csv file
probs_df = pd.read_csv(str(RES_PATH.joinpath(f'probs/{filename}'+".csv")))
# convert to datetime
probs_df['time'] = pd.to_datetime(probs_df['time'])
# create dataframe in the format for xarray
probs_df = probs_df.set_index(['time', 'lat', 'lon']).rename(columns={"0": 'probs'})
# remove probabilities greater than 1
probs_df['probs'][probs_df['probs'] >= 1.0] = np.nan
# create xarray cubes
probs_cubes = xr.Dataset.from_dataframe(probs_df)
# resample monthly
probs_cubes = probs_cubes.resample(time='1MS').mean()
# shannon info
probs_cubes['shannon_info'] = np.log(1 / probs_cubes['probs'])
probs_cubes
### Figure I - Probability Estimates
kdims = ['time', 'lon', 'lat']
vdims = ['shannon_info']
sub_cube = probs_cubes#.resample(time='1MS').mean()
xr_dataset = gv.Dataset(sub_cube.sel(time=slice('2010','2010')), kdims=kdims, vdims=vdims)
qmesh = xr_dataset.to(gv.QuadMesh, ["lon", "lat"])
qmesh.opts(colorbar=True, cmap='Reds')
kdims = ['month', 'lon', 'lat']
vdims = ['probs']
sub_cube = probs_cubes.groupby('time.month').mean()
xr_dataset = gv.Dataset(sub_cube, kdims=kdims, vdims=vdims)
xr_dataset.to(gv.QuadMesh, ["lon", "lat"], dynamic=True) * gf.coastline
# qmesh
# qmesh.opts(colorbar=True, cmap='Blues')
kdims = ['time', 'lon', 'lat']
vdims = ['probs']
sub_cube = probs_cubes#.resample(time='1MS').mean()
xr_dataset = gv.Dataset(sub_cube.sel(time=slice('2010','2010')), kdims=kdims, vdims=vdims)
temp_curve = hv.Curve(xr_dataset.reduce(['lon', 'lat'], np.mean), kdims=['time'])
temp_curve.opts(opts.Curve(aspect=2, xticks=4, xrotation=15))
kdims = ['time', 'lon', 'lat']
vdims = ['probs']
sub_cube = probs_cubes.resample(time='1MS').mean()
xr_dataset = gv.Dataset(sub_cube.sel(time=slice('2010','2010')), kdims=kdims, vdims=vdims)
xr_dataset.to(gv.QuadMesh, ["lon", "lat"])
### Figure II - Shannon Information
kdims = ['time', 'lon', 'lat']
vdims = ['shannon_info']
sub_cube = probs_cubes#.resample(time='1MS').mean()
xr_dataset = gv.Dataset(sub_cube.sel(time=slice('2010','2010')), kdims=kdims, vdims=vdims)
xr_dataset.to(gv.QuadMesh, ["lon", "lat"])
from typing import Optional
def plot_map(xr_data, measure: str, save_name: Optional[str]=None):
fig, ax = plt.subplots()
if measure == 'probs':
xr_data.probs.mean(dim='time').plot(
ax=ax,
vmin=0, robust=True, cmap='Reds',
cbar_kwargs={'label': "Probability"}
)
elif measure == 'info':
xr_data.shannon_info.mean(dim='time').plot(
ax=ax,
vmin=0, robust=True, cmap='Reds',
cbar_kwargs={'label': "Shannon Information"}
)
else:
raise ValueError(f"Unrecognized measure: {measure}")
ax.set(
xlabel='Longitude',
ylabel='Latitude'
)
plt.tight_layout()
if save_name:
fig.savefig(FIG_PATH.joinpath(f"{measure}_{save_name}.png"))
def plot_ts(xr_data, measure: str, save_name: Optional[str]=None):
fig, ax = plt.subplots()
if measure == 'probs':
xr_data.probs.mean(dim=['lon', 'lat']).plot.line(ax=ax, color='black', linewidth=3)
ylabel = 'Probability'
elif measure == 'info':
xr_data.shannon_info.mean(dim=['lon', 'lat']).plot.line(ax=ax, color='black', linewidth=3)
ylabel = 'Shannon Information'
else:
raise ValueError(f"Unrecognized measure: {measure}")
ax.set(
xlabel='Time',
ylabel=ylabel
)
ax.legend(['Mean Predictions'])
plt.tight_layout()
if save_name:
fig.savefig(FIG_PATH.joinpath(f"{measure}_ts_{save_name}.png"))
def plot_ts_error(xr_data, measure: str, save_name: Optional[str]=None):
if measure == 'probs':
predictions = xr_data.probs.mean(dim=['lat','lon'])
std = xr_data.probs.std(dim=['lat','lon'])
ylabel = 'Probabilities'
elif measure == 'info':
predictions = xr_data.shannon_info.mean(dim=['lat','lon'])
std = xr_data.shannon_info.std(dim=['lat','lon'])
ylabel = 'Shannon Information'
else:
raise ValueError(f"Unrecognized measure: {measure}")
fig, ax = plt.subplots()
ax.plot(xr_data.coords['time'].values, predictions)
ax.fill_between(
predictions.coords['time'].values,
predictions - std,
predictions + std,
alpha=0.7, color='orange'
)
ax.set(
xlabel='Time',
ylabel=ylabel,
)
ax.legend(['Mean_predictions'])
plt.tight_layout()
if save_name:
fig.savefig(FIG_PATH.joinpath(f"{measure}_ts_err_{save_name}.png"))
def plot_monthly_map(xr_data, measure: str, save_name: Optional[str]=None):
plt.figure()
xr_data.probs.groupby('time.month').mean().plot.pcolormesh(x='lon', y='lat', col='month', col_wrap=3, vmin=0, robust=True, cmap='Reds')
plt.savefig(FIG_PATH.joinpath(f"monthly_{save_name}.png"))
### Figure I - Probability Maps
plot_map(probs_cubes, 'probs', None)
plot_map(probs_cubes, 'info', None)
### Figure II - Probability Maps, Per Month
plot_monthly_map(probs_cubes, 'probs', None)
plot_monthly_map(probs_cubes, 'info', None)
### Figure 2.1 - Time Series (Probability)
plot_ts(probs_cubes, 'probs', None)
plot_ts(probs_cubes, 'info', None)
### Figure 2.2 - Time Series w. Variance
plot_ts_error(probs_cubes, 'probs', None);
plot_ts_error(probs_cubes, 'info', None)
### Other Plots
### Information
$$
I(\mathbf{X}) = - \log p(\mathbf{X})
$$
probs_cubes['shannon_info'] = - np.log(probs_cubes.probs) #* np.log(2)
fig, ax = plt.subplots()
probs_cubes.shannon_info.mean(dim='time').plot(
ax=ax,
robust=True, cmap='Reds',
cbar_kwargs={'label': "Shannon Information"}
)
ax.set(
xlabel='Longitude',
ylabel='Latitude',
)
plt.tight_layout()
plt.figure(figsize=(10,10))
probs_cubes.shannon_info.groupby('time.month').mean().plot.pcolormesh(x='lon', y='lat', col='month', col_wrap=3, robust=True, cmap='Reds')
## Experiment II - Spain, 2010, Cube 2x2x2
!ls $RES_PATH/probs
probs_df.head()
filename = "spain_gpp_2002_2010_v0_222.csv"
# read csv file
probs_df = pd.read_csv(str(RES_PATH.joinpath(f'probs/{filename}')))
# convert to datetime
probs_df['time'] = pd.to_datetime(probs_df['time'])
# create dataframe in the format for xarray
probs_df = probs_df.set_index(['time', 'lat', 'lon']).rename(columns={"0": 'probs'})
# remove probabilities greater than 1
probs_df['probs'][probs_df['probs'] >= 1.0] = np.nan
# create xarray cubes
probs_cubes = xr.Dataset.from_dataframe(probs_df, )
probs_cubes
fig, ax = plt.subplots()
probs_cubes.probs.mean(dim='time').plot(
ax=ax,
vmin=0, robust=True, cmap='Reds',
cbar_kwargs={'label': "Probabilities"}
)
ax.set(
xlabel='Longitude',
ylabel='Latitude'
)
plt.tight_layout()
fig, ax = plt.subplots()
probs_cubes.probs.mean(dim=['lon', 'lat']).plot.line(ax=ax, color='black', linewidth=3)
ax.set(
xlabel='Time',
ylabel='Probabilities'
)
ax.legend(['Mean Predictions'])
plt.tight_layout()
plt.show()
probs_cubes.probs.groupby('time.month').mean().plot.pcolormesh(
x='lon', y='lat',
col='month', col_wrap=3,
vmin=0, robust=True, cmap='Reds',
cbar_kwargs={
'label': 'Probabilities',
'location':'bottom',
'shrink': 0.7,
'pad': 0.075
}
)
### Anomalies
probs_cubes['shannon_info'] = - np.log(probs_cubes.probs) #* np.log(2)
fig, ax = plt.subplots()
probs_cubes.shannon_info.mean(dim='time').plot(
ax=ax,
robust=True, cmap='Reds',
cbar_kwargs={'label': "Shannon Information"}
)
ax.set(
xlabel='Longitude',
ylabel='Latitude',
)
plt.tight_layout()
plt.figure(figsize=(10,10))
probs_cubes.shannon_info.groupby('time.month').mean().plot.pcolormesh(
x='lon', y='lat',
col='month', col_wrap=3,
robust=True, cmap='Reds',
cbar_kwargs={
'label': 'Shannon Entropy',
'location':'bottom',
'shrink': 0.7,
'pad': 0.075
}
)
# define threshold
instances = 4
# calculate densities
# calculate density threshold
## Experiment Steps