Checking the Isotropic scaling¶

In this notebook, we will do a short demonstration for how the HSIC suffers from isotropic scaling factor.

As outlined in this paper,in a nutshell, we would prefer to have similarity measures that following the following properties:

Invariance to Invertible Linear Transformations

$s(X,Y) = s(XA, YB)$

for any full rank $A$ and $B$ .

Invariance to Orthogonal Transformation

$s(X,Y) = s(XU,YV)$

where $U^\top U=I$ and $V^\top V=I$ .

Invariance to Isotropic Scaling

$s(X, Y)=s(\alpha X, \beta Y)$

where $\alpha,\beta \in \mathbb{R}^+$ .

Code¶

import sys, os
import warnings
import tqdm
import random
import pandas as pd
import numpy as np

import matplotlib
import matplotlib.pyplot as plt

# Insert path to model directory,.
cwd = os.getcwd()
path = f"{cwd}/../../src"
sys.path.insert(0, path)

# toy datasets
from data.toy import generate_dependence_data

# Kernel Dependency measure
from models.dependence import HSIC, train_rbf_hsic
from models.kernel import estimate_sigma, sigma_to_gamma, gamma_to_sigma, get_param_grid

# RBIG IT measures
from models.ite_algorithms import run_rbig_models

import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
%matplotlib inline

warnings.filterwarnings('ignore') # get rid of annoying warnings

%load_ext autoreload
%autoreload 2

Example¶

Data¶

def get_data(dataset='sine', num_points=1000, seed=123, noise_x=0.1, noise_y=0.1):
    # data params
    dataset = 'sine'
    num_points = 1000
    seed = 123
    noise_x = 0.1
    noise_y = 0.1
    alpha   = 10
    beta    = 10

    # get dataset
    X, Y = generate_dependence_data(
        dataset=dataset,
        num_points=num_points,
        seed=seed,
        noise_x=noise_x,
        noise_y=noise_y
    )
    return X, Y

# data params
dataset = 'sine'
num_points = 1000
seed = 123
noise_x = 0.1
noise_y = 0.1
alpha   = 10
beta    = 10

# get dataset
X, Y = generate_dependence_data(
    dataset=dataset,
    num_points=num_points,
    seed=seed,
    noise_x=noise_x,
    noise_y=noise_y
)

# plot

fig, ax = plt.subplots(nrows=1, figsize=(7, 5))

ax.scatter(alpha*X, Y)
plt.legend(fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.tight_layout()
plt.show()

save_path = f'{cwd}/../../results/hsic/figures/demo/'
fig.savefig(f"{save_path}demo_{dataset}.png")

No handles with labels found to put in legend.

Noise in the Data¶

Linear

Sinusoidal

Circle

Random

Experiment¶

We want a method that is invariant to an orthogonal transformation as well as isotropic scaling. The HSIC method is invariant to orthogonal transformations but not isotropic scaling. This can actually be rectified by simply normalizing the measure which is apparent in the centered kernel alignment measure (cKA). This notebook will be doing a short demonstration as to how the HSIC measure changes due to isotropic scaling whereas the cKA is not.

Fixed Experimental Parameters

Dataset - Sinesoidal Curve > This is sufficiently nonlinear with some dependencies within the data. Complex but simple to analyze.
Kernel - RBF > A universal kernel that's quite simple and can potentially model any function.
Gamma - silverman of data > We found in the previous notebook that the silverman works as a pretty decent estimator for the data. So we will reuse it in this experiment as well.
n_samples, d_dimensions

Free Experimental Parameters

HSIC method (HSIC, KA, cKA)
Scale applied to the data

Helper Functions¶

def get_hsic(X, Y, scorer='hsic', kernel='rbf', gamma=1.0):

    # hsic params
    subsample = None
    bias = True

    # initialize HSIC calculator
    clf_hsic = HSIC(
        gamma=gamma,
        kernel=kernel, 
        scorer=scorer, 
        subsample=subsample,
        bias=bias
    )

    # calculate HSIC return scorer
    clf_hsic.fit(X, Y);

    # hsic value and kernel alignment score
    return clf_hsic.hsic_value

Experiment I - Linear Kernel¶

# Fixed Params (Data)
dataset = 'line'
num_points = 1000
seed = 123
noise_x = 0.1
noise_y = 0.1
beta = 1.0

# Fixed Params (Algorithms)
sigma_est = 'median'
kernel = 'linear'



# Free Params
alphas = np.logspace(-2, 2, 10)

scorers = ['hsic', 'tka', 'ctka']

results_df = pd.DataFrame(columns=[
    'scorer',
    'hsic',
    'scale',
    'gamma'
])

for iscorer in scorers:
    for ialpha in alphas:

        # generate data
        X, Y = generate_dependence_data(
            dataset=dataset,
            num_points=num_points,
            seed=seed,
            noise_x=noise_x,
            noise_y=noise_y,
        )

        # Scale X,Y  by factor alpha, beta
        X *= ialpha
        Y *= 1.0

        # estimate sigma values
        init_sigma_X = estimate_sigma(X, method=sigma_est)
        init_sigma_Y = estimate_sigma(Y, method=sigma_est)

        init_sigma = np.mean([init_sigma_X, init_sigma_Y])

        init_gamma = sigma_to_gamma(init_sigma)

        # Calculate HSIC value
        hsic_val = get_hsic(X, Y, scorer=iscorer, kernel=kernel, gamma=init_gamma)

        # save results to dataframe
        results_df = results_df.append({
            'scorer': iscorer,
            'hsic': hsic_val,
            'scale': ialpha,
            'gamma': init_gamma,
        }, ignore_index=True)

results_df.head()

	scorer	hsic	scale	gamma
0	hsic	6.436471e-07	0.010000	21.881260
1	hsic	4.983526e-06	0.027826	21.154142
2	hsic	3.858563e-05	0.077426	19.311447
3	hsic	2.987545e-04	0.215443	15.286885
4	hsic	2.313148e-03	0.599484	8.950672

Results¶

def plot_results(results_df, scorer):

    # subset dataframe based on scorer
    results_df = results_df[results_df['scorer'] == scorer]

    # plot results
    fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
    sns.lineplot(x="scale", y="hsic", data=results_df, ax=ax[0])
    sns.lineplot(x="scale", y="gamma", data=results_df, ax=ax[1])
    ax[0].set_ylabel(scorer.upper())
    ax[1].set_yscale('log')

    plt.show()

plot_results(results_df, 'hsic')
plot_results(results_df, 'tka')
plot_results(results_df, 'ctka')

Experiment II - RBF Kernel¶

# Fixed Params (Data)
dataset = 'circ'
num_points = 1000
seed = 123
noise_x = 0.1
noise_y = 0.1
beta = 1.0

# Fixed Params (Algorithms)
sigma_est = 'belkin'
kernel = 'rbf'



# Free Params
alphas = np.logspace(-2, 2, 10)

scorers = ['hsic', 'tka', 'ctka']

results_df = pd.DataFrame(columns=[
    'scorer',
    'hsic',
    'scale',
    'gamma'
])

for iscorer in scorers:
    for ialpha in alphas:

        # generate data
        X, Y = generate_dependence_data(
            dataset=dataset,
            num_points=num_points,
            seed=seed,
            noise_x=noise_x,
            noise_y=noise_y,
        )
#         print(X.min(), X.max())
        # Scale X,Y  by factor alpha, beta
        X *= ialpha
        Y *= ialpha
#         print(X.min(), X.max())
        # estimate sigma values
        init_sigma_X = estimate_sigma(X, method=sigma_est)
        init_sigma_Y = estimate_sigma(Y, method=sigma_est)
#         print(init_sigma_X, init_sigma_Y)
        init_sigma = np.mean([init_sigma_X, init_sigma_Y])

        init_gamma = sigma_to_gamma(init_sigma)

        # Calculate HSIC value
        hsic_val = get_hsic(X, Y, scorer=iscorer, kernel=kernel, gamma=init_gamma)

        # save results to dataframe
        results_df = results_df.append({
            'scorer': iscorer,
            'hsic': hsic_val,
            'scale': ialpha,
            'gamma': init_gamma,
        }, ignore_index=True)

plot_results(results_df, 'hsic')
plot_results(results_df, 'tka')
plot_results(results_df, 'ctka')

Experiment III - Optimized RBF Gamma¶

# Fixed Params (Data)
dataset = 'circ'
num_points = 1000
seed = 123
noise_x = 0.1
noise_y = 0.1
beta = 1.0

# Fixed Params (Algorithms)
sigma_est = 'belkin'
kernel = 'rbf'
n_gamma = 50
factor = 2
n_jobs = -1
cv = 2


# Free Params
alphas = np.logspace(-2, 2, 10)
scorers = ['hsic', 'tka', 'ctka']

results_df = pd.DataFrame(columns=[
    'scorer',
    'hsic',
    'scale',
    'gamma'
])

for iscorer in scorers:
    for ialpha in alphas:

        # generate data
        X, Y = generate_dependence_data(
            dataset=dataset,
            num_points=num_points,
            seed=seed,
            noise_x=noise_x,
            noise_y=noise_y,
        )

        # Scale X,Y  by factor alpha, beta
#         print(ialpha)
        X *= ialpha
        Y *= ialpha

        # estimate sigma values
        init_sigma_X = estimate_sigma(X, method=sigma_est)
        init_sigma_Y = estimate_sigma(Y, method=sigma_est)
        init_sigma = np.mean([init_sigma_X, init_sigma_Y])

        init_gamma = sigma_to_gamma(init_sigma)


        # Calculate HSIC value
        clf_hsic = train_rbf_hsic(
            X, Y, 
            scorer=iscorer, 
            n_gamma=n_gamma,
            factor=factor,
            sigma_est=sigma_est,        
        )


        hsic_val = clf_hsic.score(X)
        # save results to dataframe
        results_df = results_df.append({
            'scorer': iscorer,
            'hsic': hsic_val,
            'scale': ialpha,
            'gamma': init_gamma,
        }, ignore_index=True)

Results¶

plot_results(results_df, 'hsic')
plot_results(results_df, 'tka')
plot_results(results_df, 'ctka')