Walkthrough - Gamma in the Distribution Sapce¶
import sys, os
# Insert path to model directory,.
cwd = os.getcwd()
path = f"{cwd}/../../src"
sys.path.insert(0, path)
# Insert path to package,.
pysim_path = f"/home/emmanuel/code/pysim/"
sys.path.insert(0, pysim_path)
import warnings
from tqdm import tqdm
import random
import pandas as pd
import numpy as np
import argparse
from sklearn.utils import check_random_state
# toy datasets
from data.distribution import DataParams
from features.utils import dict_product
# Kernel Dependency measure
from pysim.kernel.hsic import HSIC
from pysim.kernel.utils import estimate_gamma, GammaParam, SigmaParam
# RBIG IT measures
# from models.ite_algorithms import run_rbig_models
from models.dependence import HSICModel
# Plotting
from visualization.distribution import plot_scorer
# experiment helpers
from tqdm import tqdm
# Plotting Procedures
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use(['seaborn-paper'])
%matplotlib inline
warnings.filterwarnings('ignore') # get rid of annoying warnings
%load_ext autoreload
%autoreload 2
FIG_PATH = "/home/emmanuel/projects/2019_hsic_align/results/figures/distribution_experiment/gamma_space/"
RES_PATH = "/home/emmanuel/projects/2019_hsic_align/data/results/distributions/gamma_space/"
Datasets¶
- Samples - [500, 1K, 5K, 10K, 30K, 50K]
- Dimensions - [ 2, 3, 10, 50, 100]
- trials -
1:5
- IT measures - Mutual Information
- Distributions - [Gaussian, T-Student]
Functions¶
# HSIC FUNCTION
def get_hsic(X, Y, scorer: str, sigma_X, sigma_Y):
# init hsic model class
hsic_model = HSICModel()
# hsic model params
hsic_model.kernel_X = RBF(sigma_X)
hsic_model.kernel_Y = RBF(sigma_Y)
# get hsic score
hsic_val = hsic_model.get_score(inputs.X, inputs.Y, scorer)
return hsic_val
Example Gaussian Distribution: 2D¶
def plot_data(X):
# plot data
fig = plt.figure()
g = sns.jointplot(
x=X[:, 0],
y=X[:, 1],
)
plt.show()
Param I: Standardize or Non-Standardized Data¶
\bar{x} = \frac{x - \mu_x}{\sigma_x}
a) Not Standardized Data¶
# initialize Data Params class
example_params = DataParams(standardize=False)
# generate data from params
inputs_nstd = example_params.generate_data()
plot_data(inputs_nstd.X)
plt.gcf()
plt.savefig(f"{FIG_PATH}1_dataX_nstd.png")
plot_data(inputs_nstd.Y)
plt.gcf()
plt.savefig(f"{FIG_PATH}1_dataY_nstd.png")
b) Standardized Data¶
# initialize Data Params class
example_params = DataParams(standardize=True)
# generate data from params
inputs_std = example_params.generate_data()
plot_data(inputs_std.X)
plt.gcf()
plt.savefig(f"{FIG_PATH}1_dataX_std.png")
plot_data(inputs_std.Y)
plt.gcf()
plt.savefig(f"{FIG_PATH}1_dataY_std.png")
HSIC Algorithms¶
- Hilbert-Schmidt Independence Criterion (HSIC)
- Kernel Alignment (KA)
- Centered Kernel Alignment (CKA)
HSIC¶
algorithm path: src/models/dependence.py
2. Estimate HSIC¶
2.1 - Same Length Scale¶
# init gamma estimator
sigma_estimator = SigmaParam(method='median', percent=0.5, per_dimension=False)
# estimate sigma
sigma_X = sigma_estimator.estimate_sigma(X=inputs.X, )
sigma_Y = sigma_estimator.estimate_sigma(X=inputs.Y, )
print(f'Sigma_x: ', sigma_X)
print(f'Sigma_y: ', sigma_Y)
# init hsic model class
hsic_model = HSICModel()
# hsic model params
hsic_model.kernel_X = RBF(sigma_X)
hsic_model.kernel_Y = RBF(sigma_Y)
# get hsic score
hsic_val = hsic_model.get_score(inputs.X, inputs.Y, 'hsic')
print(f"HSIC: {hsic_val:.5f}")
2.2 - Different Length Scale¶
from sklearn.gaussian_process.kernels import RBF
# init gamma estimator
sigma_estimator = SigmaParam(method='median', percent=0.5, per_dimension=True)
# estimate sigma
sigma_X = sigma_estimator.estimate_sigma(X=inputs.X, )
sigma_Y = sigma_estimator.estimate_sigma(X=inputs.Y, )
print(f'Sigma_x: ', sigma_X)
print(f'Sigma_y: ', sigma_Y)
# init hsic model class
hsic_model = HSICModel(kernel_X = RBF(sigma_X), kernel_Y = RBF(sigma_Y))
# get hsic score
hsic_val = hsic_model.get_score(inputs.X, inputs.Y, 'hsic')
print(f"HSIC: {hsic_val:.5f}")
Experiment I - Different Scorers¶
We are looking at different "HSIC scorers". They are:
HSIC
HSIC = \frac{1}{n(n-1)}\langle K_xH,K_yH \rangle_F
Notice: we have the centered kernels, K_xH and no normalization.
TKA
TKA = \frac{\langle K_x,K_y \rangle_F}{||K_x||_F||K_y||_F}
Notice: We have the uncentered kernels and a normalization factor.
cTKA
cTKA = \frac{\langle K_xH,K_yH \rangle_F}{||K_xH||_F||K_yH||_F}
Notice: We have the centered kernels and a normalization factor.
# init gamma estimator
sigma_estimator = SigmaParam(method='median', percent=0.5, per_dimension=True)
# estimate sigma
sigma_X = sigma_estimator.estimate_sigma(X=inputs.X, )
sigma_Y = sigma_estimator.estimate_sigma(X=inputs.Y, )
print(f'Sigma_x: ', sigma_X)
print(f'Sigma_y: ', sigma_Y)
# experimental parameters
hsic_model = HSICModel()
# init hsic model class
hsic_model = HSICModel()
# hsic model params
hsic_model.kernel_X = RBF(sigma_X)
hsic_model.kernel_Y = RBF(sigma_Y)
scores = dict()
# loop through scorers
for iscorer in ['hsic', 'ka', 'cka']:
# calculate hsic score
iscore = hsic_model.get_score(inputs_std.X, inputs_std.Y, iscorer)
# get HSIC value
print(f'{iscorer.upper()}: {iscore:.4f}')
There is an obvious difference between each of them even though the distribution hasn't changed at all. It's clear that each of the methods have a slightly different form. So perhaps it's something to do with the parameters we've chosen. We need to investigate things further.
Experiment II - Gamma Space¶
Method 2 - Multiprocessing¶
from experiments.utils import dict_product, run_parallel_step
# Data Parameters
example_data = DataParams()
example_data.std = 10
inputs = example_data.generate_data()
# experimental parameters
n_grid_points = 20
gamma_grid = np.logspace(-3, 3, n_grid_points)
parameters = {
"scorer": ['hsic', 'ka', 'cka'],
"gamma_X": np.copy(gamma_grid),
"gamma_Y": np.copy(gamma_grid),
}
# create a list of all param combinations
parameters = list(dict_product(parameters))
n_params = len(parameters)
print('# of Params:', n_params)
from typing import Dict
# define step function
def step(params: Dict, inputs):
# init hsic model class
hsic_model = HSICModel()
# hsic model params
hsic_model.gamma_X = params['gamma_X']
hsic_model.gamma_Y = params['gamma_Y']
# get hsic score
score = hsic_model.get_score(inputs.X, inputs.Y, params['scorer'])
results_df = pd.DataFrame({
'scorer': [params['scorer']],
'gamma_X': [params['gamma_X']],
'gamma_Y': [params['gamma_Y']],
'score': score,
},)
return results_df
verbose = 1
n_jobs = 2
results = run_parallel_step(
exp_step=step,
parameters=parameters,
n_jobs=n_jobs,
verbose=verbose,
inputs=inputs
)
results_full_df = pd.concat(results, ignore_index=True)
results_full_df.tail()
Viz - Heatmap of Values¶
from typing import Optional
def plot_gamma_grid(grid_df: pd.DataFrame, scorer: str, ax: Optional=None):
if ax is None:
fig, ax = plt.subplots(figsize=(8,6.5))
# ===========================================
# Plot Gridded DataFrame
# ===========================================
# subset hsic method
grid_df_ = grid_df[grid_df['scorer'] == scorer].drop('scorer', axis=1)
# create a heatmap
grid_df_ = pd.pivot_table(grid_df_, values='score', index=['gamma_Y'], columns='gamma_X')
# print(grid_df_)
# min max
if scorer == 'hsic':
vmax = 0.11
else:
vmax = 1.0
# heatmap_data.columns = np.sqrt(1 / 2 * heatmap_data.columns.values)
# heatmap_data.index = np.sqrt(1 / 2 * heatmap_data.index.values)
pts = sns.heatmap(
data=grid_df_,
xticklabels=grid_df_.columns.values.round(decimals=2),
yticklabels=grid_df_.index.values.round(decimals=2),
vmin=0, vmax=vmax, ax=ax
)
return plt.gcf(), ax
# # =======================
# # HSIC
# # =======================
# scorer = 'hsic'
# fig, ax = plot_gamma_grid(param_df, scorer=scorer)
# # ax.legend(ncol=1, fontsize=15)
# ax.set_xlabel(r'$\gamma_X$')
# ax.set_ylabel(r'$\gamma_Y$')
# ax.set_title(f'{scorer.upper()} Score, MI: {inputs.mutual_info:.2f}')
# plt.tight_layout()
# plt.show()
# # =======================
# # Kernel Alignment
# # =======================
# scorer = 'ka'
# fig, ax = plot_gamma_grid(param_df, scorer=scorer)
# # ax.legend(ncol=1, fontsize=15)
# ax.set_xlabel(r'$\gamma_X$')
# ax.set_ylabel(r'$\gamma_Y$')
# ax.set_title(f'{scorer.upper()} Score, MI: {inputs.mutual_info:.2f}')
# plt.tight_layout()
# plt.show()
# ==========================
# Centered Kernel Alignment
# ==========================
scorer = 'cka'
fig, ax = plot_gamma_grid(results_full_df, scorer=scorer)
# ax.legend(ncol=1, fontsize=15)
ax.set_xlabel(r'$\gamma_X$')
ax.set_ylabel(r'$\gamma_Y$')
ax.set_title(f'{scorer.upper()} Score, MI: {inputs.mutual_info:.2f}')
plt.tight_layout()
plt.show()
Experiment III - Gamma Estimators¶
# Data Parameters
example_data = DataParams()
example_data.std = 10
inputs = example_data.generate_data()
# experimental parameters
gamma_estimators = [
('silverman',None, None),
('scott', None, None),
*[('median', x, None) for x in np.arange(0.1, 1.0, 0.1, dtype=np.float64)]
]
parameters = {
"scorer": ['hsic', 'ka', 'cka'],
"gamma_est": gamma_estimators
}
# create a list of all param combinations
parameters = list(dict_product(parameters))
n_params = len(parameters)
print('# of Params:', n_params)
from tqdm import tqdm
# init hsic model class
hsic_model = HSICModel()
# results dataframe
gamma_ests = list()
# run experiment
for iparams in tqdm(parameters):
# estimate gamma
gamma_X = estimate_gamma(
X=inputs.X,
method=iparams['gamma_est'][0],
percent=iparams['gamma_est'][1],
scale=iparams['gamma_est'][2]
)
gamma_Y = estimate_gamma(
X=inputs.Y,
method=iparams['gamma_est'][0],
percent=iparams['gamma_est'][1],
scale=iparams['gamma_est'][2]
)
# hsic model params
hsic_model.gamma_X = gamma_X
hsic_model.gamma_Y = gamma_Y
# get hsic score
score = hsic_model.get_score(inputs.X, inputs.Y, iparams['scorer'])
# # init gamma estimator
# gamma_est = GammaParam(*imethod)
# # initialize gamma
# gamma_init = get_gamma_init(X, Y, imethod[0], imethod[1])
# # get hsic_value
# hsic_value = get_hsic(X, Y, iscorer, gamma_init, maximum=False)
# append results to dataframe
gamma_ests.append(
pd.DataFrame({
'scorer': [iparams['scorer']],
'gamma_method': [iparams['gamma_est'][0]],
'gamma_scale': [iparams['gamma_est'][2]],
'gamma_percent': [iparams['gamma_est'][1]],
'gamma_X': [gamma_X],
'gamma_Y': [gamma_Y],
'score': [score],
}))
# results_df.head()
gamma_ests_df = pd.concat(gamma_ests, ignore_index=True)
gamma_ests_df.tail()
from typing import List
def plot_estimated_params(
ax,
gamma_est_df: pd.DataFrame,
gamma_estimators: List,
scorer: str
):
# subset hsic method
df_ = gamma_est_df[gamma_est_df['scorer'] == scorer]
# subset gamma estimators
for iestimator in gamma_estimators:
# subsets
sub_df = df_[df_['gamma_method'] == iestimator[0]]
if iestimator[1] is not None:
sub_df = sub_df[sub_df['gamma_percent'] == iestimator[1]]
if iestimator[2] is not None:
sub_df = sub_df[sub_df['gamma_scale'] == iestimator[2]]
name = list(filter(None, iestimator))
name = '_'.join(str(i) for i in name)
ax.scatter(
sub_df.gamma_X,
sub_df.gamma_Y,
s=500, label=f"{name}", zorder=3, marker='.')
return ax
# ==========================
# Centered Kernel Alignment
# ==========================
scorer = 'cka'
# Plot Grid
fig, ax = plot_gamma_grid(results_full_df, scorer=scorer)
# Plot points
ax = plot_estimated_params(ax, gamma_ests_df, gamma_estimators, scorer, )
ax.legend(ncol=1, fontsize=8)
ax.set_xlabel(r'$\gamma_X$')
ax.set_ylabel(r'$\gamma_Y$')
ax.set_title(f'{scorer.upper()} Score, MI: {inputs.mutual_info:.2f}')
plt.tight_layout()
plt.show()
Viz - Combined Function¶
def plot_dist_params(param_df, gamma_ests_df, gamma_estimators, scorer: str):
# Plot Grid
fig, ax = plot_gamma_grid(param_df, scorer=scorer)
# Plot points
ax = plot_estimated_params(ax, gamma_ests_df, gamma_estimators, scorer, )
ax.legend(ncol=1, fontsize=8)
ax.set_xlabel(r'$\gamma_X$')
ax.set_ylabel(r'$\gamma_Y$')
ax.set_title(f'{scorer.upper()} Score, MI: {inputs.mutual_info:.2f}')
plt.tight_layout()
plt.show()
plot_dist_params(results_full_df, gamma_ests_df, gamma_estimators, 'hsic')
plot_dist_params(results_full_df, gamma_ests_df, gamma_estimators, 'ka')
plot_dist_params(results_full_df, gamma_ests_df, gamma_estimators, 'cka')