Checking the Isotropic scaling¶
In this notebook, we will do a short demonstration for how the HSIC suffers from isotropic scaling factor.
As outlined in this paper,in a nutshell, we would prefer to have similarity measures that following the following properties:
Invariance to Invertible Linear Transformations
s(X,Y) = s(XA, YB)
for any full rank A and B.
Invariance to Orthogonal Transformation
s(X,Y) = s(XU,YV)
where U^\top U=I and V^\top V=I.
Invariance to Isotropic Scaling
s(X, Y)=s(\alpha X, \beta Y)
where \alpha,\beta \in \mathbb{R}^+.
Code¶
import sys, os
import warnings
import tqdm
import random
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# Insert path to model directory,.
cwd = os.getcwd()
path = f"{cwd}/../../src"
sys.path.insert(0, path)
# toy datasets
from data.toy import generate_dependence_data
# Kernel Dependency measure
from models.dependence import HSIC, train_rbf_hsic
from models.kernel import estimate_sigma, sigma_to_gamma, gamma_to_sigma, get_param_grid
# RBIG IT measures
from models.ite_algorithms import run_rbig_models
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
%matplotlib inline
warnings.filterwarnings('ignore') # get rid of annoying warnings
%load_ext autoreload
%autoreload 2
Example¶
Data¶
def get_data(dataset='sine', num_points=1000, seed=123, noise_x=0.1, noise_y=0.1):
# data params
dataset = 'sine'
num_points = 1000
seed = 123
noise_x = 0.1
noise_y = 0.1
alpha = 10
beta = 10
# get dataset
X, Y = generate_dependence_data(
dataset=dataset,
num_points=num_points,
seed=seed,
noise_x=noise_x,
noise_y=noise_y
)
return X, Y
# data params
dataset = 'sine'
num_points = 1000
seed = 123
noise_x = 0.1
noise_y = 0.1
alpha = 10
beta = 10
# get dataset
X, Y = generate_dependence_data(
dataset=dataset,
num_points=num_points,
seed=seed,
noise_x=noise_x,
noise_y=noise_y
)
# plot
fig, ax = plt.subplots(nrows=1, figsize=(7, 5))
ax.scatter(alpha*X, Y)
plt.legend(fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.tight_layout()
plt.show()
save_path = f'{cwd}/../../results/hsic/figures/demo/'
fig.savefig(f"{save_path}demo_{dataset}.png")
Noise in the Data¶
Linear
Sinusoidal
Circle
Random
Experiment¶
We want a method that is invariant to an orthogonal transformation as well as isotropic scaling. The HSIC method is invariant to orthogonal transformations but not isotropic scaling. This can actually be rectified by simply normalizing the measure which is apparent in the centered kernel alignment measure (cKA). This notebook will be doing a short demonstration as to how the HSIC measure changes due to isotropic scaling whereas the cKA is not.
Fixed Experimental Parameters
- Dataset - Sinesoidal Curve > This is sufficiently nonlinear with some dependencies within the data. Complex but simple to analyze.
- Kernel - RBF > A universal kernel that's quite simple and can potentially model any function.
- Gamma - silverman of data > We found in the previous notebook that the silverman works as a pretty decent estimator for the data. So we will reuse it in this experiment as well.
- n_samples, d_dimensions
Free Experimental Parameters
- HSIC method (HSIC, KA, cKA)
- Scale applied to the data
Helper Functions¶
def get_hsic(X, Y, scorer='hsic', kernel='rbf', gamma=1.0):
# hsic params
subsample = None
bias = True
# initialize HSIC calculator
clf_hsic = HSIC(
gamma=gamma,
kernel=kernel,
scorer=scorer,
subsample=subsample,
bias=bias
)
# calculate HSIC return scorer
clf_hsic.fit(X, Y);
# hsic value and kernel alignment score
return clf_hsic.hsic_value
Experiment I - Linear Kernel¶
# Fixed Params (Data)
dataset = 'line'
num_points = 1000
seed = 123
noise_x = 0.1
noise_y = 0.1
beta = 1.0
# Fixed Params (Algorithms)
sigma_est = 'median'
kernel = 'linear'
# Free Params
alphas = np.logspace(-2, 2, 10)
scorers = ['hsic', 'tka', 'ctka']
results_df = pd.DataFrame(columns=[
'scorer',
'hsic',
'scale',
'gamma'
])
for iscorer in scorers:
for ialpha in alphas:
# generate data
X, Y = generate_dependence_data(
dataset=dataset,
num_points=num_points,
seed=seed,
noise_x=noise_x,
noise_y=noise_y,
)
# Scale X,Y by factor alpha, beta
X *= ialpha
Y *= 1.0
# estimate sigma values
init_sigma_X = estimate_sigma(X, method=sigma_est)
init_sigma_Y = estimate_sigma(Y, method=sigma_est)
init_sigma = np.mean([init_sigma_X, init_sigma_Y])
init_gamma = sigma_to_gamma(init_sigma)
# Calculate HSIC value
hsic_val = get_hsic(X, Y, scorer=iscorer, kernel=kernel, gamma=init_gamma)
# save results to dataframe
results_df = results_df.append({
'scorer': iscorer,
'hsic': hsic_val,
'scale': ialpha,
'gamma': init_gamma,
}, ignore_index=True)
results_df.head()
Results¶
def plot_results(results_df, scorer):
# subset dataframe based on scorer
results_df = results_df[results_df['scorer'] == scorer]
# plot results
fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
sns.lineplot(x="scale", y="hsic", data=results_df, ax=ax[0])
sns.lineplot(x="scale", y="gamma", data=results_df, ax=ax[1])
ax[0].set_ylabel(scorer.upper())
ax[1].set_yscale('log')
plt.show()
plot_results(results_df, 'hsic')
plot_results(results_df, 'tka')
plot_results(results_df, 'ctka')
Experiment II - RBF Kernel¶
# Fixed Params (Data)
dataset = 'circ'
num_points = 1000
seed = 123
noise_x = 0.1
noise_y = 0.1
beta = 1.0
# Fixed Params (Algorithms)
sigma_est = 'belkin'
kernel = 'rbf'
# Free Params
alphas = np.logspace(-2, 2, 10)
scorers = ['hsic', 'tka', 'ctka']
results_df = pd.DataFrame(columns=[
'scorer',
'hsic',
'scale',
'gamma'
])
for iscorer in scorers:
for ialpha in alphas:
# generate data
X, Y = generate_dependence_data(
dataset=dataset,
num_points=num_points,
seed=seed,
noise_x=noise_x,
noise_y=noise_y,
)
# print(X.min(), X.max())
# Scale X,Y by factor alpha, beta
X *= ialpha
Y *= ialpha
# print(X.min(), X.max())
# estimate sigma values
init_sigma_X = estimate_sigma(X, method=sigma_est)
init_sigma_Y = estimate_sigma(Y, method=sigma_est)
# print(init_sigma_X, init_sigma_Y)
init_sigma = np.mean([init_sigma_X, init_sigma_Y])
init_gamma = sigma_to_gamma(init_sigma)
# Calculate HSIC value
hsic_val = get_hsic(X, Y, scorer=iscorer, kernel=kernel, gamma=init_gamma)
# save results to dataframe
results_df = results_df.append({
'scorer': iscorer,
'hsic': hsic_val,
'scale': ialpha,
'gamma': init_gamma,
}, ignore_index=True)
plot_results(results_df, 'hsic')
plot_results(results_df, 'tka')
plot_results(results_df, 'ctka')
Experiment III - Optimized RBF Gamma¶
# Fixed Params (Data)
dataset = 'circ'
num_points = 1000
seed = 123
noise_x = 0.1
noise_y = 0.1
beta = 1.0
# Fixed Params (Algorithms)
sigma_est = 'belkin'
kernel = 'rbf'
n_gamma = 50
factor = 2
n_jobs = -1
cv = 2
# Free Params
alphas = np.logspace(-2, 2, 10)
scorers = ['hsic', 'tka', 'ctka']
results_df = pd.DataFrame(columns=[
'scorer',
'hsic',
'scale',
'gamma'
])
for iscorer in scorers:
for ialpha in alphas:
# generate data
X, Y = generate_dependence_data(
dataset=dataset,
num_points=num_points,
seed=seed,
noise_x=noise_x,
noise_y=noise_y,
)
# Scale X,Y by factor alpha, beta
# print(ialpha)
X *= ialpha
Y *= ialpha
# estimate sigma values
init_sigma_X = estimate_sigma(X, method=sigma_est)
init_sigma_Y = estimate_sigma(Y, method=sigma_est)
init_sigma = np.mean([init_sigma_X, init_sigma_Y])
init_gamma = sigma_to_gamma(init_sigma)
# Calculate HSIC value
clf_hsic = train_rbf_hsic(
X, Y,
scorer=iscorer,
n_gamma=n_gamma,
factor=factor,
sigma_est=sigma_est,
)
hsic_val = clf_hsic.score(X)
# save results to dataframe
results_df = results_df.append({
'scorer': iscorer,
'hsic': hsic_val,
'scale': ialpha,
'gamma': init_gamma,
}, ignore_index=True)
Results¶
plot_results(results_df, 'hsic')
plot_results(results_df, 'tka')
plot_results(results_df, 'ctka')