Skip to content

Checking the Isotropic scaling

In this notebook, we will do a short demonstration for how the HSIC suffers from isotropic scaling factor.

As outlined in this paper,in a nutshell, we would prefer to have similarity measures that following the following properties:

Invariance to Invertible Linear Transformations

s(X,Y) = s(XA, YB)

for any full rank A and B.

Invariance to Orthogonal Transformation

s(X,Y) = s(XU,YV)

where U^\top U=I and V^\top V=I.

Invariance to Isotropic Scaling

s(X, Y)=s(\alpha X, \beta Y)

where \alpha,\beta \in \mathbb{R}^+.

Code

import sys, os
import warnings
import tqdm
import random
import pandas as pd
import numpy as np

import matplotlib
import matplotlib.pyplot as plt

# Insert path to model directory,.
cwd = os.getcwd()
path = f"{cwd}/../../src"
sys.path.insert(0, path)

# toy datasets
from data.toy import generate_dependence_data

# Kernel Dependency measure
from models.dependence import HSIC, train_rbf_hsic
from models.kernel import estimate_sigma, sigma_to_gamma, gamma_to_sigma, get_param_grid

# RBIG IT measures
from models.ite_algorithms import run_rbig_models

import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
%matplotlib inline

warnings.filterwarnings('ignore') # get rid of annoying warnings

%load_ext autoreload
%autoreload 2

Example

Data

def get_data(dataset='sine', num_points=1000, seed=123, noise_x=0.1, noise_y=0.1):
    # data params
    dataset = 'sine'
    num_points = 1000
    seed = 123
    noise_x = 0.1
    noise_y = 0.1
    alpha   = 10
    beta    = 10

    # get dataset
    X, Y = generate_dependence_data(
        dataset=dataset,
        num_points=num_points,
        seed=seed,
        noise_x=noise_x,
        noise_y=noise_y
    )
    return X, Y
# data params
dataset = 'sine'
num_points = 1000
seed = 123
noise_x = 0.1
noise_y = 0.1
alpha   = 10
beta    = 10

# get dataset
X, Y = generate_dependence_data(
    dataset=dataset,
    num_points=num_points,
    seed=seed,
    noise_x=noise_x,
    noise_y=noise_y
)

# plot

fig, ax = plt.subplots(nrows=1, figsize=(7, 5))

ax.scatter(alpha*X, Y)
plt.legend(fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.tight_layout()
plt.show()

save_path = f'{cwd}/../../results/hsic/figures/demo/'
fig.savefig(f"{save_path}demo_{dataset}.png")
No handles with labels found to put in legend.

Noise in the Data

Linear

Sinusoidal

Circle

Random

Experiment

We want a method that is invariant to an orthogonal transformation as well as isotropic scaling. The HSIC method is invariant to orthogonal transformations but not isotropic scaling. This can actually be rectified by simply normalizing the measure which is apparent in the centered kernel alignment measure (cKA). This notebook will be doing a short demonstration as to how the HSIC measure changes due to isotropic scaling whereas the cKA is not.

Fixed Experimental Parameters

  • Dataset - Sinesoidal Curve > This is sufficiently nonlinear with some dependencies within the data. Complex but simple to analyze.
  • Kernel - RBF > A universal kernel that's quite simple and can potentially model any function.
  • Gamma - silverman of data > We found in the previous notebook that the silverman works as a pretty decent estimator for the data. So we will reuse it in this experiment as well.
  • n_samples, d_dimensions

Free Experimental Parameters

  • HSIC method (HSIC, KA, cKA)
  • Scale applied to the data

Helper Functions

def get_hsic(X, Y, scorer='hsic', kernel='rbf', gamma=1.0):

    # hsic params
    subsample = None
    bias = True

    # initialize HSIC calculator
    clf_hsic = HSIC(
        gamma=gamma,
        kernel=kernel, 
        scorer=scorer, 
        subsample=subsample,
        bias=bias
    )

    # calculate HSIC return scorer
    clf_hsic.fit(X, Y);

    # hsic value and kernel alignment score
    return clf_hsic.hsic_value

Experiment I - Linear Kernel

# Fixed Params (Data)
dataset = 'line'
num_points = 1000
seed = 123
noise_x = 0.1
noise_y = 0.1
beta = 1.0

# Fixed Params (Algorithms)
sigma_est = 'median'
kernel = 'linear'



# Free Params
alphas = np.logspace(-2, 2, 10)

scorers = ['hsic', 'tka', 'ctka']

results_df = pd.DataFrame(columns=[
    'scorer',
    'hsic',
    'scale',
    'gamma'
])
for iscorer in scorers:
    for ialpha in alphas:

        # generate data
        X, Y = generate_dependence_data(
            dataset=dataset,
            num_points=num_points,
            seed=seed,
            noise_x=noise_x,
            noise_y=noise_y,
        )

        # Scale X,Y  by factor alpha, beta
        X *= ialpha
        Y *= 1.0

        # estimate sigma values
        init_sigma_X = estimate_sigma(X, method=sigma_est)
        init_sigma_Y = estimate_sigma(Y, method=sigma_est)

        init_sigma = np.mean([init_sigma_X, init_sigma_Y])

        init_gamma = sigma_to_gamma(init_sigma)

        # Calculate HSIC value
        hsic_val = get_hsic(X, Y, scorer=iscorer, kernel=kernel, gamma=init_gamma)

        # save results to dataframe
        results_df = results_df.append({
            'scorer': iscorer,
            'hsic': hsic_val,
            'scale': ialpha,
            'gamma': init_gamma,
        }, ignore_index=True)
results_df.head()
scorer hsic scale gamma
0 hsic 6.436471e-07 0.010000 21.881260
1 hsic 4.983526e-06 0.027826 21.154142
2 hsic 3.858563e-05 0.077426 19.311447
3 hsic 2.987545e-04 0.215443 15.286885
4 hsic 2.313148e-03 0.599484 8.950672

Results

def plot_results(results_df, scorer):

    # subset dataframe based on scorer
    results_df = results_df[results_df['scorer'] == scorer]

    # plot results
    fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
    sns.lineplot(x="scale", y="hsic", data=results_df, ax=ax[0])
    sns.lineplot(x="scale", y="gamma", data=results_df, ax=ax[1])
    ax[0].set_ylabel(scorer.upper())
    ax[1].set_yscale('log')

    plt.show()
plot_results(results_df, 'hsic')
plot_results(results_df, 'tka')
plot_results(results_df, 'ctka')

Experiment II - RBF Kernel

# Fixed Params (Data)
dataset = 'circ'
num_points = 1000
seed = 123
noise_x = 0.1
noise_y = 0.1
beta = 1.0

# Fixed Params (Algorithms)
sigma_est = 'belkin'
kernel = 'rbf'



# Free Params
alphas = np.logspace(-2, 2, 10)

scorers = ['hsic', 'tka', 'ctka']

results_df = pd.DataFrame(columns=[
    'scorer',
    'hsic',
    'scale',
    'gamma'
])
for iscorer in scorers:
    for ialpha in alphas:

        # generate data
        X, Y = generate_dependence_data(
            dataset=dataset,
            num_points=num_points,
            seed=seed,
            noise_x=noise_x,
            noise_y=noise_y,
        )
#         print(X.min(), X.max())
        # Scale X,Y  by factor alpha, beta
        X *= ialpha
        Y *= ialpha
#         print(X.min(), X.max())
        # estimate sigma values
        init_sigma_X = estimate_sigma(X, method=sigma_est)
        init_sigma_Y = estimate_sigma(Y, method=sigma_est)
#         print(init_sigma_X, init_sigma_Y)
        init_sigma = np.mean([init_sigma_X, init_sigma_Y])

        init_gamma = sigma_to_gamma(init_sigma)

        # Calculate HSIC value
        hsic_val = get_hsic(X, Y, scorer=iscorer, kernel=kernel, gamma=init_gamma)

        # save results to dataframe
        results_df = results_df.append({
            'scorer': iscorer,
            'hsic': hsic_val,
            'scale': ialpha,
            'gamma': init_gamma,
        }, ignore_index=True)
plot_results(results_df, 'hsic')
plot_results(results_df, 'tka')
plot_results(results_df, 'ctka')

Experiment III - Optimized RBF Gamma

# Fixed Params (Data)
dataset = 'circ'
num_points = 1000
seed = 123
noise_x = 0.1
noise_y = 0.1
beta = 1.0

# Fixed Params (Algorithms)
sigma_est = 'belkin'
kernel = 'rbf'
n_gamma = 50
factor = 2
n_jobs = -1
cv = 2


# Free Params
alphas = np.logspace(-2, 2, 10)
scorers = ['hsic', 'tka', 'ctka']

results_df = pd.DataFrame(columns=[
    'scorer',
    'hsic',
    'scale',
    'gamma'
])
for iscorer in scorers:
    for ialpha in alphas:

        # generate data
        X, Y = generate_dependence_data(
            dataset=dataset,
            num_points=num_points,
            seed=seed,
            noise_x=noise_x,
            noise_y=noise_y,
        )

        # Scale X,Y  by factor alpha, beta
#         print(ialpha)
        X *= ialpha
        Y *= ialpha

        # estimate sigma values
        init_sigma_X = estimate_sigma(X, method=sigma_est)
        init_sigma_Y = estimate_sigma(Y, method=sigma_est)
        init_sigma = np.mean([init_sigma_X, init_sigma_Y])

        init_gamma = sigma_to_gamma(init_sigma)


        # Calculate HSIC value
        clf_hsic = train_rbf_hsic(
            X, Y, 
            scorer=iscorer, 
            n_gamma=n_gamma,
            factor=factor,
            sigma_est=sigma_est,        
        )


        hsic_val = clf_hsic.score(X)
        # save results to dataframe
        results_df = results_df.append({
            'scorer': iscorer,
            'hsic': hsic_val,
            'scale': ialpha,
            'gamma': init_gamma,
        }, ignore_index=True)

Results

plot_results(results_df, 'hsic')
plot_results(results_df, 'tka')
plot_results(results_df, 'ctka')