Walkthrough - Mutual Info vs. Score for Multivariate Distributions¶
This notebook will go through some of the results of the experiment. We will be looking at the 4 factors (standardization, sigma estimator, sigma format and HSIC estimator) and try to discern how the relationship between the score and the mutual information content. It will be a bit tedious to go through things step by step, but hopefully at the end we will recover some sort of relationship.
import sys, os
# Insert path to model directory,.
cwd = os.getcwd()
path = f"{cwd}/../../src"
sys.path.insert(0, path)
# Insert path to package,.
pysim_path = f"/home/emmanuel/code/pysim/"
sys.path.insert(0, pysim_path)
import pathlib
import warnings
from typing import Optional, Tuple
from tqdm import tqdm
import random
import pandas as pd
import numpy as np
import argparse
from sklearn.utils import check_random_state
# toy datasets
from data.distribution import DataParams, Inputs
# Kernel Dependency measure
from models.dependence import HSICModel
from pysim.kernel.utils import GammaParam, SigmaParam
from sklearn.gaussian_process.kernels import RBF
# RBIG IT measures
# from models.ite_algorithms import run_rbig_models
# Plotting
from visualization.distribution import plot_scorer
# experiment helpers
from tqdm import tqdm
# Plotting Procedures
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use(['seaborn-talk'])
warnings.filterwarnings('ignore') # get rid of annoying warnings
%matplotlib inline
warnings.filterwarnings('ignore') # get rid of annoying warnings
%load_ext autoreload
%autoreload 2
Data¶
FIG_PATH = "/home/emmanuel/projects/2019_hsic_align/results/figures/distribution_experiment/mutual_info/"
RES_PATH = "/home/emmanuel/projects/2019_hsic_align/data/results/distributions/mutual_info/"
!ls $RES_PATH
Datasets¶
- Samples - [500, 1K, 5K, 10K, 30K, 50K]
- Dimensions - [ 2, 3, 10, 50, 100]
- trials -
1:5
- IT measures - Mutual Information
- Distributions - [Gaussian, T-Student]
T-student Dataset¶
header = [
"dataset",
"trial",
"std",
"nu",
"samples",
"dimensions",
"standardize",
"per_dimension",
"separate_scales",
# SIGMA METHOD PARAMS
"sigma_method",
"sigma_percent",
"sigma_X",
"sigma_Y",
# HSIC Params
"scorer",
"score",
"mutual_info",
]
dataset = 'gauss' # other option tstudent
# results_df = pd.read_csv(f"{RES_PATH}old/{dataset}_mi.csv")
results_df = pd.concat([
pd.read_csv(f"{RES_PATH}/v3_{dataset}.csv", index_col=0),
# pd.read_csv(f"{RES_PATH}/tstudent_mi.csv", index_col=0)
], )
results_df.tail()
Cleaning¶
So we need to clean this up a little bit.
- We don't need the sigma values (for now)
- We should take the average of the trials to get some estimates
res_df_ = results_df.copy()
# drop sigma, dataset name columns
res_df_ = res_df_.drop([
# 'sigma_X', 'sigma_Y',
'dataset'], axis=1)
# =================
# average trials
# =================
# get dependent variables
dependent_vars = [
# Daataset params
"std",
"nu",
"samples",
"dimensions",
# STANDARDIZE PARAMS
"standardize",
# SIGMA FORMAT PARAMS
"per_dimension",
"separate_scales",
# SIGMA METHOD PARAMS
"sigma_method",
"sigma_percent",
# HSIC Params
"scorer",
"score",
"mutual_info",
# "trial"
]
res_df_ = res_df_[res_df_['trial'] == 1].drop('trial', axis=1)
# res_df_.set_index(dependent_vars).groupby(['trial']).mean()
res_df_.head()
Case I - Different HSIC Scorer¶
def plot_scores(df: pd.DataFrame) -> None:
# choose the 3 cases (i.e. the scorers)
fig, ax = plt.subplots(ncols=3, figsize=(20,5))
# Case I HSIC
scorer = 'hsic'
case_ = df[df['scorer'] == scorer]
ax[0].scatter(case_.score.values, case_.mutual_info.values, s=10,)
ax[0].set_title(f'{scorer.upper()}')
ax[0].set_xlabel(f'Score')
ax[0].set_ylabel('Mutual Info')
# Case II - CKA
scorer = 'cka'
case_ = df[df['scorer'] == scorer]
ax[1].scatter(case_.score.values, case_.mutual_info.values, s=10,)
ax[1].set_title(f'{scorer.upper()}')
ax[1].set_xlabel(f'Score')
ax[1].set_ylabel('Mutual Info')
# ax[1].set_yscale('log')
# Case III - KA
scorer = 'ka'
case_ = df[df['scorer'] == scorer]
ax[2].scatter(case_.score.values, case_.mutual_info.values, s=10,)
ax[2].set_title(f'{scorer.upper()}')
ax[2].set_xlabel(f'Score')
ax[2].set_ylabel('Mutual Info')
plt.show()
return None
plot_scores(res_df_)
Case II - Sigma Estimator¶
- Scott, Silverman
- Median Values
Silverman, Scott¶
constant_methods = ['scott', 'silverman']
# subset dataset
sub_df_ = res_df_[res_df_['sigma_method'].isin(constant_methods)]
plot_scores(sub_df_)
Medians¶
percent_methods = res_df_['sigma_percent'].unique().tolist()
# subset dataset
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
plot_scores(sub_df_)
Medians - Reasonable¶
percent_methods = [0.30000000000000004, 0.4, 0.5]
# subset dataset
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
plot_scores(sub_df_)
Case III - Sigma Configuration¶
- Same Length Scale
- Separate Length Scales
- Length Scale per Dimension
constant_methods = ['scott', 'silverman']
# seperate_length_scale =
# subset dataset
sub_df_ = res_df_[res_df_['sigma_method'].isin(constant_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == False]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
plot_scores(sub_df_)
# subset dataset
sub_df_ = res_df_[res_df_['sigma_method'].isin(constant_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
plot_scores(sub_df_)
# subset dataset
sub_df_ = res_df_[res_df_['sigma_method'].isin(constant_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == True]
plot_scores(sub_df_)
Estimator - Medians¶
percent_methods = res_df_['sigma_percent'].unique().tolist()
# seperate_length_scale =
# subset dataset
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == False]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
plot_scores(sub_df_)
# subset dataset
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
plot_scores(sub_df_)
# subset dataset
print("Per Dimension, Same Data")
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == False]
sub_df_ = sub_df_[sub_df_['per_dimension'] == True]
plot_scores(sub_df_)
# subset dataset
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == True]
plot_scores(sub_df_)
Sigma Estimator - Medians, Reasonable¶
percent_methods = [0.30000000000000004, 0.4, 0.5]
# subset dataset
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == False]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
plot_scores(sub_df_)
# subset dataset
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
plot_scores(sub_df_)
# subset dataset
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == True]
plot_scores(sub_df_)
Case IV - Standardize or Not¶
Estimator: Scott/Silverman | NOT Standardized¶
constant_methods = ['scott', 'silverman']
# seperate_length_scale =
# subset dataset
sub_df_ = res_df_[res_df_['sigma_method'].isin(constant_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == False]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
sub_df_ = sub_df_[sub_df_['standardize'] == False]
plot_scores(sub_df_)
# subset dataset
sub_df_ = res_df_[res_df_['sigma_method'].isin(constant_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
sub_df_ = sub_df_[sub_df_['standardize'] == False]
plot_scores(sub_df_)
# subset dataset
sub_df_ = res_df_[res_df_['sigma_method'].isin(constant_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == True]
sub_df_ = sub_df_[sub_df_['standardize'] == False]
plot_scores(sub_df_)
Scott / Silverman | Standardized¶
constant_methods = ['scott', 'silverman']
# seperate_length_scale =
# subset dataset
print('Same Scale, Same Dim')
sub_df_ = res_df_[res_df_['sigma_method'].isin(constant_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == False]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
sub_df_ = sub_df_[sub_df_['standardize'] == True]
plot_scores(sub_df_)
# subset dataset
print('Per Scale, Same Dim')
sub_df_ = res_df_[res_df_['sigma_method'].isin(constant_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
sub_df_ = sub_df_[sub_df_['standardize'] == True]
plot_scores(sub_df_)
# subset dataset
print('Per Scale, Per Dim')
sub_df_ = res_df_[res_df_['sigma_method'].isin(constant_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == True]
sub_df_ = sub_df_[sub_df_['standardize'] == True]
plot_scores(sub_df_)
Medians | Not Standardized¶
percent_methods = res_df_['sigma_percent'].unique().tolist()
# seperate_length_scale =
# subset dataset
print('Same Scale, Same Dim')
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == False]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
sub_df_ = sub_df_[sub_df_['standardize'] == False]
plot_scores(sub_df_)
# subset dataset
print('Per Scale, Same Dim')
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
sub_df_ = sub_df_[sub_df_['standardize'] == False]
plot_scores(sub_df_)
# subset dataset
print('Per Scale, Per Dim')
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == True]
sub_df_ = sub_df_[sub_df_['standardize'] == False]
plot_scores(sub_df_)
Medians | STANDARDIZED¶
percent_methods = res_df_['sigma_percent'].unique().tolist()
# seperate_length_scale =
# subset dataset
print('Same Scale, Same Dim, Standardized')
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == False]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
sub_df_ = sub_df_[sub_df_['standardize'] == True]
plot_scores(sub_df_)
# subset dataset
print('Per Scale, Same Dim, Standardized')
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
sub_df_ = sub_df_[sub_df_['standardize'] == True]
plot_scores(sub_df_)
# subset dataset
print('Per Scale, Per Dim, Standardized')
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == True]
sub_df_ = sub_df_[sub_df_['standardize'] == True]
plot_scores(sub_df_)
Medians (Reasonable) | NOT STANDARDIZED¶
percent_methods = [0.30000000000000004, 0.4, 0.5]
# seperate_length_scale =
# subset dataset
print('Same Scale, Same Dim')
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == False]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
sub_df_ = sub_df_[sub_df_['standardize'] == False]
plot_scores(sub_df_)
# subset dataset
print('Per Scale, Same Dim')
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
sub_df_ = sub_df_[sub_df_['standardize'] == False]
plot_scores(sub_df_)
# subset dataset
print('Per Scale, Per Dim')
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
sub_df_ = sub_df_[sub_df_['per_dimension'] == True]
sub_df_ = sub_df_[sub_df_['standardize'] == False]
plot_scores(sub_df_)
Medians (Reasonable) | STANDARDIZED¶
Objective: We want 1 method to rule them all!!
percent_methods = [0.30000000000000004]
# seperate_length_scale =
# subset dataset
print('Same Scale, Same Dim, STANDARDIZED')
sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
sub_df_ = sub_df_[sub_df_['separate_scales'] == False]
sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
sub_df_ = sub_df_[sub_df_['standardize'] == True]
# sub_df_ = sub_df_[sub_df_['dimensions'] == 50]
# sub_df_ = sub_df_[sub_df_['samples'] == 50]
sub_df_.head()
plot_scores(sub_df_)
# # subset dataset
# print('Per Scale, Same Dim, STANDARDIZED')
# sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
# sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
# sub_df_ = sub_df_[sub_df_['per_dimension'] == False]
# sub_df_ = sub_df_[sub_df_['standardize'] == True]
# plot_scores(sub_df_)
# # subset dataset
# print('Per Scale, Per Dim, STANDARDIZED')
# sub_df_ = res_df_[res_df_['sigma_percent'].isin(percent_methods)]
# sub_df_ = sub_df_[sub_df_['separate_scales'] == True]
# sub_df_ = sub_df_[sub_df_['per_dimension'] == True]
# sub_df_ = sub_df_[sub_df_['standardize'] == True]
# plot_scores(sub_df_)
Viz I - Difference in Method¶
For the first visualization, we're just going to get a general overview of how each method performs
from typing import List, Callable
def plot_individual_scores(
scores_df: pd.DataFrame,
gamma_estimators: List,
scorer: str,
dataset: str='gauss',
mi_scale: Optional[Callable[[np.ndarray], np.ndarray]]=None
):
# intialize plot
fig, ax = plt.subplots()
# subset dataset
df_ = scores_df[scores_df['dataset'] == dataset]
# subset hsic method
df_ = df_[df_['scorer'] == scorer]
if mi_scale is not None:
df_['mutual_info'] = mi_scale(df_['mutual_info'])
# subset gamma estimators
for iestimator in gamma_estimators:
# subsets
sub_df = df_[df_['gamma_method'] == iestimator[0]]
if iestimator[1] is not None:
sub_df = sub_df[sub_df['gamma_percent'] == iestimator[1]]
if iestimator[2] is not None:
sub_df = sub_df[sub_df['gamma_scale'] == iestimator[2]]
name = list(filter(None, iestimator))
name = '_'.join(str(i) for i in name)
ax.scatter(
sub_df.score,
sub_df.mutual_info,
s=50, label=f"{name}", zorder=3, marker='.')
return fig, ax
Viz - Scott, Silverman¶
This should be the worst one for each of them because this method isn't taking into account the dimensions or the samples in a very smart way. It's fine for 1D examples, but we know that this isn't very good for data with a large number of samples or large number of dimensions.
demo_params = [
('silverman',None, None),
('scott', None, None),
# *[('median', x, None) for x in np.arange(0.1, 1.0, 0.1, dtype=np.float64)]
]
scorer = 'cka'
dataset = 'gauss'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_standard.png")
scorer = 'hsic'
dataset = 'tstudent'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_standard.png")
scorer = 'cka'
dataset = 'gauss'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_standard.png")
scorer = 'cka'
dataset = 'tstudent'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_standard.png")
scorer = 'ka'
dataset = 'gauss'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_standard.png")
scorer = 'ka'
dataset = 'tstudent'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_standard.png")
Viz - Median Distances (All)¶
So for this, I will be looking at a few median distance values. This is the standard method so it will be good to compare.
[('median', x, None) for x in np.arange(0.1, 1.0, 0.1, dtype=np.float64)]
demo_params = [
('median', 0.30000000000000004, None),
('median', 0.5, None),
('median', 0.7000000000000001, None),
]
demo_params = [('median', x, None) for x in np.arange(0.1, 1.0, 0.1, dtype=np.float64)]
projects/2019_hsic_align/results/figures/distribution_experiment/mutual_info
scorer = 'hsic'
dataset = 'gauss'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
# ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_median_all.png")
scorer = 'hsic'
dataset = 'tstudent'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
# ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_median_all.png")
scorer = 'cka'
dataset = 'gauss'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
# ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_median_all.png")
scorer = 'cka'
dataset = 'tstudent'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
# ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_median_all.png")
scorer = 'ka'
dataset = 'gauss'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
# ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_median_all.png")
scorer = 'ka'
dataset = 'tstudent'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
# ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_median_all.png")
#### Reasonable
demo_params = [
('median', 0.30000000000000004, None),
('median', 0.5, None),
('median', 0.7000000000000001, None),
]
scorer = 'hsic'
dataset = 'gauss'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
# ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_median_all.png")
scorer = 'hsic'
dataset = 'tstudent'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
# ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_median_all.png")
scorer = 'cka'
dataset = 'gauss'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
# ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_median_all.png")
scorer = 'cka'
dataset = 'tstudent'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
# ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_median_all.png")
scorer = 'ka'
dataset = 'gauss'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
# ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_median_all.png")
scorer = 'ka'
dataset = 'tstudent'
fig, ax = plot_individual_scores(results_df, demo_params, scorer, dataset, mi_scale=np.log2)
ax.set_xlabel(f"Score")
ax.set_ylabel(f"Mutual Information")
ax.set_title(f"Method: {scorer}, Dataset: {dataset}")
# ax.legend()
plt.tight_layout()
fig.savefig(f"{FIG_PATH}{scorer}_{dataset}_median_all.png")