Results - Prelim. Spa-Temp Experiment¶
In this notebook, I will be walking through how we can estimate different methods based on the density cubes that we derive.
import sys, os
from pyprojroot import here
root = here(project_files=[".here"])
sys.path.append(str(here()))
import pathlib
# standard python packages
import xarray as xr
import pandas as pd
import numpy as np
#
# Experiment Functions
from src.data.esdc import get_dataset
from src.features import Metrics
from src.features.temporal import select_period, get_smoke_test_time
from src.features.spatial import select_region, get_europe
from src.models.train_models import get_similarity_scores
from src.experiments.utils import dict_product, run_parallel_step
from src.features import Metrics
from src.features.density import get_density_cubes
from src.features.preprocessing import standardizer_data, get_reference_cube, get_common_indices
from src.models.similarity import cka_coefficient, rv_coefficient, rbig_it_measures
# # esdc tools
# from src.esdc.subset import select_pixel
# from src.esdc.shape import ShapeFileExtract, rasterize
# from esdc.transform import DensityCubes
from typing import List, Dict
import xarray as xr
from tqdm import tqdm
import cartopy
import cartopy.crs as ccrs
# NUMPY SETTINGS
import numpy as onp
onp.set_printoptions(precision=3, suppress=True)
# MATPLOTLIB Settings
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
# SEABORN SETTINGS
import seaborn as sns
sns.set_context(context='talk',font_scale=0.7)
# sns.set(rc={'figure.figsize': (12, 9.)})
# sns.set_style("whitegrid")
# PANDAS SETTINGS
import pandas as pd
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)
# LOGGING SETTINGS
import sys
import logging
logging.basicConfig(
level=logging.INFO,
stream=sys.stdout,
format='%(asctime)s:%(levelname)s:%(message)s'
)
logger = logging.getLogger()
#logger.setLevel(logging.INFO)
%load_ext autoreload
%autoreload 2
Results Plot¶
For this first set of results, I'm looking at the following:
- Entropy of the different spatial-temporal density cube compositions.
- MI between just the sames with not spatial-temporal features and the density cubes with different spatiel-temporal compositions.
Hypothesis
- Less mutual information with more features
- I'm not sure about entropy.
!ls /home/emmanuel/projects/2020_rbig_rs/data/spa_temp/trial_experiment/
RES_PATH = pathlib.Path(str(root)).joinpath(
"data/spa_temp/trial_experiment/v4_lst_spain.csv"
)
results = pd.read_csv(str(RES_PATH))
# results['ratio'] = results['spatial'] / results['temporal']
results.tail()
Entropy¶
sub_df = results.copy()
sub_df['ratio'] = sub_df['spatial'] / sub_df['temporal']
# sub_df['dimensions'] = sub_df['spatial'] ** 2 + sub_df['temporal']
# sub_df['nH'] = (sub_df['rbig_H_y'] / sub_df['dimensions']) #* np.log(2)
sub_df["spatial"] = sub_df["spatial"].astype('category')
sub_df["temporal"] = sub_df["temporal"].astype('category')
sub_df["n_dimensions"] = sub_df["n_dimensions"].astype('category')
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='rbig_H_y', hue='n_dimensions', data=sub_df, ax=ax[0], label='Entropy',)
sns.scatterplot(x='ratio', y='rbig_H_y', hue='temporal', data=sub_df, ax=ax[1], label='Entropy',)
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax, label='RV')
ax[0].set_xscale('log'), ax[1].set_xscale('log')
ax[0].set_xlabel('Spatial / Temporal Ratio'), ax[1].set_xlabel('Spatial / Temporal Ratio')
ax[0].set_ylabel('Entropy'), ax[1].set_ylabel('')
plt.show()
Normalized Entropy¶
Normalization by Features¶
So firstly, we can normalize by the features. This will allow us to see a more relative impact on the entropy measures. We know there will be a change in entropy with more features, but it would be nice to see the relative impact. So we can do this by dividing by the number of features.
$$ \tilde{\text{H}}(X) = \frac{1}{\text{n_features}}H(X) $$
sub_df = results.copy()
# find the min,max values
sub_df['dimensions'] = sub_df['spatial'] ** 2 + sub_df['temporal']
sub_df['H_y_norm'] = sub_df['rbig_H_y'] / sub_df['dimensions']
# Plot stuff
sub_df['ratio'] = sub_df['spatial'] / sub_df['temporal']
sub_df["spatial"] = sub_df["spatial"].astype('category')
sub_df["temporal"] = sub_df["temporal"].astype('category')
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='H_y_norm', hue='spatial', data=sub_df, ax=ax[0], label='Entropy',)
sns.scatterplot(x='ratio', y='H_y_norm', hue='temporal', data=sub_df, ax=ax[1], label='Entropy',)
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax, label='RV')
ax[0].set(
xscale='log',
xlabel='Spatial / Temporal Ratio',
ylabel='Normalized Entropy (via Features)'
)
ax[1].set(
xscale='log',
xlabel='Spatial / Temporal Ratio',
ylabel=''
)
plt.show()
Method I - Upper Bound¶
The easiest way I found is to divide by the upper bound. So:
$$ \tilde{H}(X) = \frac{1}{\log N} H(X) $$
sub_df = results.copy()
# get number of samples
n_samples = sub_df['rbig_H_y'].shape[0]
sub_df['ratio'] = sub_df['spatial'] / sub_df['temporal']
sub_df['dimensions'] = sub_df['spatial'] ** 2 + sub_df['temporal']
sub_df['H_y_norm'] = (sub_df['rbig_H_y'] * np.log(2)) / np.log2(n_samples) #* np.log(2)
sub_df["spatial"] = sub_df["spatial"].astype('category')
sub_df["temporal"] = sub_df["temporal"].astype('category')
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='H_y_norm', hue='spatial', data=sub_df, ax=ax[0], label='Entropy',)
sns.scatterplot(x='ratio', y='H_y_norm', hue='temporal', data=sub_df, ax=ax[1], label='Entropy',)
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax, label='RV')
ax[0].set_xscale('log'), ax[1].set_xscale('log')
ax[0].set_yscale('symlog'), ax[1].set_yscale('symlog')
ax[0].set_xlabel('Spatial / Temporal Ratio'), ax[1].set_xlabel('Spatial / Temporal Ratio')
ax[0].set_ylabel('Normalized Entropy'), ax[1].set_ylabel('')
plt.show()
Method II - Rescaling via the min/max¶
So for this more involved technique...we can try to rescale the entropy measures based on their joint min-max values.
I feel like this method is more for visualization if anything. So the procedure would be as follows:
- Find the min of H_X,H_Y
- Find the max of H_X,H_Y
- Rescale by the min,max values to be between 0,1
sub_df = results.copy()
# find the min,max values
min_H = sub_df[['rbig_H_y', 'rbig_H_x']].min().min()
max_H = sub_df[['rbig_H_y', 'rbig_H_x']].max().max()
# Rescale via min/max values
def rescale(x, min_value, max_value):
return (x - min_value) / (max_value - min_value)
sub_df['H_y_norm'] = sub_df['rbig_H_y'].apply(rescale, args=(min_H, max_H))
sub_df['H_x_norm'] = sub_df['rbig_H_x'].apply(rescale, args=(min_H, max_H))
# Plot stuff
sub_df['ratio'] = sub_df['spatial'] / sub_df['temporal']
sub_df["spatial"] = sub_df["spatial"].astype('category')
sub_df["temporal"] = sub_df["temporal"].astype('category')
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='H_y_norm', hue='spatial', data=sub_df, ax=ax[0], label='Entropy',)
sns.scatterplot(x='ratio', y='H_y_norm', hue='temporal', data=sub_df, ax=ax[1], label='Entropy',)
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax, label='RV')
ax[0].set(
xscale='log',
# yscale='symlog',
xlabel='Spatial / Temporal Ratio',
ylabel='Normalized Entropy'
)
ax[1].set(
xscale='log',
xlabel='Spatial / Temporal Ratio',
ylabel=''
)
plt.show()
Method - Softplus¶
$$ \log(1 + \exp(-x)) $$
sub_df = results.copy()
# find the min,max values
min_H = sub_df[['rbig_H_y', 'rbig_H_x']].min().min()
max_H = sub_df[['rbig_H_y', 'rbig_H_x']].max().max()
# Rescale via min/max values
def softplus(x):
return np.log1p(np.exp(x))
def safe_softplus(x):
return x * (x >= 0) + np.log1p(np.exp(-np.abs(x)))
sub_df['H_y_softplus'] = sub_df['rbig_H_y'].apply(softplus)
sub_df['H_x_softplus'] = sub_df['rbig_H_x'].apply(softplus)
# Plot stuff
sub_df['ratio'] = sub_df['spatial'] / sub_df['temporal']
sub_df["spatial"] = sub_df["spatial"].astype('category')
sub_df["temporal"] = sub_df["temporal"].astype('category')
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='H_y_softplus', hue='spatial', data=sub_df, ax=ax[0], label='Entropy',)
sns.scatterplot(x='ratio', y='H_y_softplus', hue='temporal', data=sub_df, ax=ax[1], label='Entropy',)
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax, label='RV')
ax[0].set(
xscale='log',
# yscale='symlog',
xlabel='Spatial / Temporal Ratio',
ylabel='Entropy (Softplus)'
)
ax[1].set(
xscale='log',
# yscale='symlog',
xlabel='Spatial / Temporal Ratio',
ylabel=''
)
plt.show()
Mutual Information¶
sub_df = results.copy()
# Plot stuff
sub_df['ratio'] = sub_df['spatial'] / sub_df['temporal']
sub_df["spatial"] = sub_df["spatial"].astype('category')
sub_df["temporal"] = sub_df["temporal"].astype('category')
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='rbig_I_xy', data=sub_df, hue='spatial', ax=ax[0])
sns.scatterplot(x='ratio', y='rbig_I_xy', data=sub_df, hue='temporal', ax=ax[1])
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax[1], label='RV')
# ax[0,0].set_yscale('log')
ax[0].set(xscale='log',
# yscale='symlog',
ylabel='Mutual Information',
xlabel='Spatial / Temporal Ratio'
)
ax[1].set(xscale='log',
# yscale='symlog',
ylabel='',
xlabel='Spatial / Temporal Ratio'
)
Normalized Mutual Information¶
Method I - Naive¶
There is a problem because the entropy has a lot of negative values. So we're going to rescale the entropy such that it is above 0. However...I'm not sure what to do about the MI because I feel like we should definitely rescale that as well.
Some ideas given from this paper [pg 740, eq. 6] include:
which takes inspiration from the equation from [Cover and Thomas, 1991].
and they essentially solved this equation for \rho.
sub_df = results.copy()
# Plot stuff
sub_df['ratio'] = sub_df['spatial'] / sub_df['temporal']
sub_df["spatial"] = sub_df["spatial"].astype('category')
sub_df["temporal"] = sub_df["temporal"].astype('category')
sub_df['nI2'] = np.sqrt( 1 - np.exp(-2 * sub_df['rbig_I_xy']))
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='nI2', data=sub_df, hue='spatial', ax=ax[0], label='nMI')
sns.scatterplot(x='ratio', y='nI2', data=sub_df, hue='temporal', ax=ax[1], label='nMI')
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax[1], label='RV')
# ax[0,0].set_yscale('log')
ax[0].set_xscale('log')
ax[0].set_ylabel('Normalized Mutual Information')
ax[0].set_xlabel('Spatial / Temporal Ratio')
# ax[0, 1].set_yscale('log')
ax[1].set_xscale('log')
ax[1].set_ylabel('')
ax[1].set_xlabel('Spatial / Temporal Ratio')
Update: I guess another interpretation is technically, I'm seeing more features and thus are more realizations (with perturbations) of my observations. So maybe there should be more mutual information. Especially if it's redundant.
What I don't like is that we're not using the entropy measures.
MI - Max¶
sub_df = results.copy()
# Plot stuff
sub_df['ratio'] = sub_df['spatial'] / sub_df['temporal']
sub_df["spatial"] = sub_df["spatial"].astype('category')
sub_df["temporal"] = sub_df["temporal"].astype('category')
max_H = sub_df[['rbig_H_x','rbig_H_y']].max().max()
print(max_H)
sub_df['nI2'] = sub_df['rbig_I_xy'] / max_H
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='nI2', data=sub_df, hue='spatial', ax=ax[0])
sns.scatterplot(x='ratio', y='nI2', data=sub_df, hue='temporal', ax=ax[1])
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax[1], label='RV')
# ax[0,0].set_yscale('log')
ax[0].set_xscale('log')
ax[0].set_ylabel('Normalized Mutual Information')
ax[0].set_xlabel('Spatial / Temporal Ratio')
# ax[0, 1].set_yscale('log')
ax[1].set_xscale('log')
ax[1].set_ylabel('')
ax[1].set_xlabel('Spatial / Temporal Ratio')
MI (Joint)¶
sub_df = results.copy()
# Plot stuff
sub_df['ratio'] = sub_df['spatial'] / sub_df['temporal']
sub_df["spatial"] = sub_df["spatial"].astype('category')
sub_df["temporal"] = sub_df["temporal"].astype('category')
sub_df['H_xy'] = sub_df['rbig_H_x'] + sub_df['rbig_H_y'] - sub_df['rbig_I_xy']
print(max_H)
sub_df['nI2'] = sub_df['rbig_I_xy'] / sub_df['H_xy']
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='nI2', data=sub_df, hue='spatial', ax=ax[0])
sns.scatterplot(x='ratio', y='nI2', data=sub_df, hue='temporal', ax=ax[1])
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax[1], label='RV')
# ax[0,0].set_yscale('log')
ax[0].set_xscale('log')
ax[0].set_ylabel('Normalized Mutual Information')
ax[0].set_xlabel('Spatial / Temporal Ratio')
# ax[0, 1].set_yscale('log')
ax[1].set_xscale('log')
ax[1].set_ylabel('')
ax[1].set_xlabel('Spatial / Temporal Ratio')
Method¶
sub_df = results.copy()
# variation of information
sub_df['H_xy'] = sub_df['rbig_H_x'] + sub_df['rbig_H_y'] - sub_df['rbig_I_xy']
factor = sub_df['H_xy'] / sub_df['rbig_H_x'] / sub_df['rbig_H_y']
# sub_df['nI2'] = (2 * sub_df['rbig_I_xy'] * factor ) - 1
sub_df['nI2'] = sub_df['rbig_I_xy'] * factor
# Plot stuff
sub_df['ratio'] = sub_df['spatial'] / sub_df['temporal']
sub_df["spatial"] = sub_df["spatial"].astype('category')
sub_df["temporal"] = sub_df["temporal"].astype('category')
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='nI2', data=sub_df, hue='spatial', ax=ax[0])
sns.scatterplot(x='ratio', y='nI2', data=sub_df, hue='temporal', ax=ax[1])
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax[1], label='RV')
# ax[0,0].set_yscale('log')
ax[0].set_xscale('log')
ax[0].set_ylabel('Normalized Mutual Information')
ax[0].set_xlabel('Spatial / Temporal Ratio')
# ax[0, 1].set_yscale('log')
ax[1].set_xscale('log')
ax[1].set_ylabel('')
ax[1].set_xlabel('Spatial / Temporal Ratio')
Variation of Information¶
There are a few ways to calculate VI and I will outline how we can calculate them and their normalized variants.
Method I - Variation of Information¶
Firstly, the VI measure was developed by Meila, 2007 [pg 879, eq 19] and is written as:
To get the normalized variant, we can use the measure was developed by Meila, 2007 [pg 886, sec 5.1] which is called the Adjusted VI distance. Knowing that the VI measure is upper bounded by \log N Meila, 2007 [pg 881, eq. 24], they simply divide the VI measure by \log N to get a measure between 0 and 1. So we get
Note: In theory, this should be bounded between 0 and 1 but I'm not sure what happens with nevative values...
# get number of samples
n_samples = sub_df['rbig_H_x'].shape[0]
# calculate variation of information
sub_df['vi_joint'] = sub_df['rbig_H_x'] + sub_df['rbig_H_y'] - 2 * sub_df['rbig_I_xy']
sub_df['vi_joint_norm'] = (sub_df['vi_joint']) / np.log(n_samples)
# Plot Unnormalized
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='vi_joint', data=sub_df, hue='spatial', ax=ax[0], label='nMI')
sns.scatterplot(x='ratio', y='vi_joint', data=sub_df, hue='temporal', ax=ax[1], label='nMI')
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax[1], label='RV')
ax[0].set_yscale('symlog')
ax[0].set_xscale('log')
ax[0].set_ylabel('VI (Meila 2005)')
ax[0].set_xlabel('Spatial / Temporal Ratio')
ax[1].set_yscale('symlog')
ax[1].set_xscale('log')
ax[1].set_ylabel('')
ax[1].set_xlabel('Spatial / Temporal Ratio')
# Plot Normalized
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='vi_joint_norm', data=sub_df, hue='spatial', ax=ax[0], label='nMI')
sns.scatterplot(x='ratio', y='vi_joint_norm', data=sub_df, hue='temporal', ax=ax[1], label='nMI')
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax[1], label='RV')
ax[0].set_yscale('symlog')
ax[0].set_xscale('log')
ax[0].set_ylabel('Normalized VI (Meila 2007)')
ax[0].set_xlabel('Spatial / Temporal Ratio')
ax[1].set_yscale('symlog')
ax[1].set_xscale('log')
ax[1].set_ylabel('')
ax[1].set_xlabel('Spatial / Temporal Ratio')
Method III - Variation of Information (Joint)¶
This is analogous to the measure above but the way they do the normalization is different. This one I found in this paper [pg. 2842, table 3].
Note: Below I show how the quantity on the right is equivalent to the middle term:
$$ \begin{aligned} 1 - \frac{I(X;Y)}{H(X,Y)} &= 1 - \frac{I(X;Y)}{H(X) + H(Y) - I(X;Y)} \ &= \frac{H(X) + H(Y) - I(X;Y)}{H(X) + H(Y) - I(X;Y)} - \frac{I(X;Y)}{H(X) + H(Y) - I(X;Y)} \ &= \frac{H(X) + H(Y) - 2I(X;Y)}{H(X) + H(Y) - I(X;Y)} \ &= \frac{VI(X;Y)}{H(X,Y)} \end{aligned} $$
# get number of samples
n_samples = sub_df['rbig_H_x'].shape[0]
# calculate variation of information
sub_df['vi'] = sub_df['rbig_H_x'] + sub_df['rbig_H_y'] - 2 * sub_df['rbig_I_xy']
sub_df['H_xy'] = sub_df['rbig_H_x'] + sub_df['rbig_H_y'] - sub_df['rbig_I_xy']
sub_df['nVI_joint'] = sub_df['vi'] / sub_df['H_xy']
# sub_df['d_joint'] = 1 - (sub_df['rbig_I_xy'] / sub_df['H_xy'])
# Plot Unnormalized
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='nVI_joint', data=sub_df, hue='spatial', ax=ax[0], label='nMI')
sns.scatterplot(x='ratio', y='nVI_joint', data=sub_df, hue='temporal', ax=ax[1], label='nMI')
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax[1], label='RV')
ax[0].set_yscale('log')
# ax[0].set_ylim([0.9, 1.1])
ax[0].set_xscale('log')
ax[0].set_ylabel('nVI (Joint) (Kaskov 2005)')
ax[0].set_xlabel('Spatial / Temporal Ratio')
ax[1].set_yscale('log')
ax[1].set_xscale('log')
ax[1].set_ylabel('')
ax[1].set_xlabel('Spatial / Temporal Ratio')
Furthermore, I didn't estimate the joint entropy H(X,Y) directly with RBIG. So we could be introducing a lot of errors by doing all of the manipulations to acquire the joint entropy.
Method III - Variation of Information (Max)¶
This is the last variation of the VI method found in this paper [pg. 2842, table 3].
And apparently this is the tightest bound over the VI measure. Another name is called the Normalized Information Distance.
sub_df['nVI_max'] = 1 - (sub_df['rbig_I_xy'] / np.maximum(sub_df['rbig_H_x'], sub_df['rbig_H_y']))
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.scatterplot(x='ratio', y='nVI_max', data=sub_df, hue='spatial', ax=ax[0], label='nMI')
sns.scatterplot(x='ratio', y='nVI_max', data=sub_df, hue='temporal', ax=ax[1], label='nMI')
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax[1], label='RV')
# ax[0].set_yscale('log')
# ax[0].set_ylim([0.9, 1.1])
ax[0].set(
xscale='log',
ylabel='nVI (Max) (Kaskov 2005)',
xlabel='Spatial / Temporal Ratio'
)
ax[1].set(
xscale='log',
ylabel='',
xlabel='Spatial / Temporal Ratio'
)
plt.tight_layout()
plt.show()
Now, later, I will have to figure out how this ties into the Taylor Diagram. But for now, I'm quite happy.
from sklearn.preprocessing import MinMaxScaler
# find the min,max of all entropy values
min_H = sub_df[['rbig_H_x', 'rbig_H_y']].min().min()
max_H = sub_df[['rbig_H_x', 'rbig_H_y']].max().max()
# Scale between 0 and the max value?
sub_df['rbig_H_xs'] = MinMaxScaler((0, max_H)).fit_transform(sub_df['rbig_H_x'].values[:, None])
sub_df['rbig_H_ys'] = MinMaxScaler((0, max_H)).fit_transform(sub_df['rbig_H_y'].values[:, None])
# calculate normalized MI with the scaled versions
sub_df['d_max'] = (sub_df['rbig_I_xy'] / np.sqrt(sub_df['rbig_H_xs']) / np.sqrt(sub_df['rbig_H_ys'])) #* np.log(2)
fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(10, 10))
sns.scatterplot(x='ratio', y='d_max', data=sub_df, hue='spatial', ax=ax[0,0], label='nMI')
sns.scatterplot(x='ratio', y='d_max', data=sub_df, hue='temporal', ax=ax[0,1], label='nMI')
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax[1], label='RV')
# ax[0,0].set_yscale('log')
ax[0,0].set_xscale('log')
ax[0,0].set_ylabel('Normalized Mutual Information')
ax[0,0].set_xlabel('Spatial / Temporal Ratio')
# ax[0, 1].set_yscale('log')
ax[0, 1].set_xscale('log')
ax[0, 1].set_ylabel('')
ax[0, 1].set_xlabel('Spatial / Temporal Ratio')
sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, hue='spatial', ax=ax[1,0], label='nMI')
sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, hue='temporal', ax=ax[1,1], label='nMI')
# sns.scatterplot(x='ratio', y='rv_coeff', data=sub_df, ax=ax[1], label='RV')
ax[1,0].set_yscale('log')
ax[1,0].set_xscale('log')
ax[1,0].set_ylabel('Coefficient')
ax[1,0].set_xlabel('Spatial / Temporal Ratio')
ax[1, 1].set_yscale('log')
ax[1, 1].set_xscale('log')
ax[1, 1].set_ylabel('')
ax[1, 1].set_xlabel('Spatial / Temporal Ratio')
plt.show()
So right away, we see that the RV Coefficient doesn't fluxuate at all (or at least it is rather random and I don't see a clear pattern.
Mutual Information
MI clearly has a pattern. That's good, it's picking up something about the feature representations. The only problem is it's difficult to interpret because it's not really between 0 and 1, so I'm not sure how we're supposed to use this in a Taylor Diagram.