Skip to content

Distribution Experiment

import sys, os
import warnings
import tqdm
import random
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

# Insert path to model directory,.
cwd = os.getcwd()
path = f"{cwd}/../../src"
sys.path.insert(0, path)

# toy datasets
from data.toy import RBIGData

# Experiments
from experiments.distributions import DistributionExp

# Kernel Dependency measure
from models.dependence import HSIC, train_rbf_hsic
from models.kernel import estimate_sigma, sigma_to_gamma, gamma_to_sigma, get_param_grid

# RBIG IT measures
from models.ite_algorithms import run_rbig_models

import scipy.io as scio

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

warnings.filterwarnings('ignore') # get rid of annoying warnings

%load_ext autoreload
%autoreload 2
path
'/home/emmanuel/projects/2019_hsic_align/notebooks/4_distributions/../../src'

Datasets

  • Samples - [500, 1K, 5K, 10K, 30K, 50K]
  • Dimensions - [ 2, 3, 10, 50, 100]
  • trials - [1,5]
  • IT measures - [TC, H, MI, KLD]
  • Distributions - [Linear, Gaussian, T-Student]

Example - Gaussian Distribution

data_path = "/media/disk/erc/papers/2018_RBIG_IT_measures/2018_RBIG_IT_measures/reproducible_results/DATA/"
gauss_data = f"{data_path}MI_gaus/"
sample_data = "DATA_MI_gaus_nd_3_Ns_500_tryal_1.mat"
dat = scio.loadmat(f"{gauss_data}{sample_data}")
X, Y, mi_val = dat['X'], dat['Y'], dat['MI_ori_nats']

Using the Helper function

from typing import Optional

class MIData:
    """MI Data


    Dataset
    -------
    trials = 1:5
    samples = 50, 100, 500, 1_000, 5_000
    dimensions = 2, 3, 10, 50, 100
    std = 1:11
    nu = 1:9
    """
    def __init__(self, distribution: Optional['gauss'])-> None:

        self.distribution = distribution
        self.data_path = "/media/disk/erc/papers/2019_HSIC_ALIGN/data/mi_distributions/"

        if self.distribution == 'gauss':
            self.dist_path = f"{self.data_path}MI_gaus/"
        elif self.distribution == 'tstudent':
            self.dist_path = f"{self.data_path}MI_tstu/"
        else:
            raise ValueError(f"Unrecognized Dataset: {distribution}")

    def get_data(self, samples=50, dimensions=2, std=1, trial=1, nu=1):

        if self.distribution == 'gauss':
            dat = scio.loadmat(
                f"{self.dist_path}DATA_MI_gaus_nd_{dimensions}_"
                f"Ns_{samples}_std_{std}_tryal_{trial}.mat"
            )

            return dat['X'], dat['Y'], float(dat['MI_ori_nats'][0][0])
        elif self.distribution == 'tstudent':
            dat = scio.loadmat(
                f"{self.dist_path}DATA_MI_tstu_nd_{dimensions}_"
                f"Ns_{samples}_tryal_{trial}_nu_{nu}.mat"
            )

            return dat['X'], dat['Y'], float(dat['MI_ori_nats'][0][0])
        else:
            raise ValueError(f"Unrecognized distribution '{self.distribution}'")
itera = {'1': 'a', '2': 'b'}
for iitera in itera.items():
    print(iitera[0], iitera[1])
1 a
2 b
dataset = 'tstudent'

mi_loader = MIData('tstudent')

x, y, mi = mi_loader.get_data()

SAVE_PATH = "/home/emmanuel/projects/2019_hsic_align/results/hsic/"

clf_exp = DistributionExp(
    seed=123,
    factor=1,
    sigma_est='median',
    n_gamma=10,
    save_path=SAVE_PATH,
    save_name='dist_v2_belkin',
)

# run full experiment
clf_exp.run_experiment()
Function: gauss
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-6-6466ca7e5a66> in <module>
     11 
     12 # run full experiment
---> 13 clf_exp.run_experiment()

~/projects/2019_hsic_align/notebooks/4_distributions/../../src/experiments/distributions.py in run_experiment(self)
    223                                         hsic_method=hsic_method,
    224                                         hsic_score=hsic_score,
--> 225                                         mi_score=mi_score,
    226                                     )
    227 

~/projects/2019_hsic_align/notebooks/4_distributions/../../src/experiments/distributions.py in append_results(self, results_df, dataset, trial, n_samples, d_dimensions, std, nu, gamma, gamma_median, gamma_silv, gamma_scott, gamma_belkin, hsic_method, hsic_score, mi_score)
    332                 "mi_score": mi_score,
    333             },
--> 334             ignore_index=True,
    335         )
    336 

~/.conda/envs/it4dnn/lib/python3.6/site-packages/pandas/core/frame.py in append(self, other, ignore_index, verify_integrity, sort)
   7103                 columns=combined_columns,
   7104             )
-> 7105             other = other._convert(datetime=True, timedelta=True)
   7106             if not self.columns.equals(combined_columns):
   7107                 self = self.reindex(columns=combined_columns)

~/.conda/envs/it4dnn/lib/python3.6/site-packages/pandas/core/generic.py in _convert(self, datetime, numeric, timedelta, coerce, copy)
   6044                 timedelta=timedelta,
   6045                 coerce=coerce,
-> 6046                 copy=copy,
   6047             )
   6048         ).__finalize__(self)

~/.conda/envs/it4dnn/lib/python3.6/site-packages/pandas/core/internals/managers.py in convert(self, **kwargs)
    582 
    583     def convert(self, **kwargs):
--> 584         return self.apply("convert", **kwargs)
    585 
    586     def replace(self, value, **kwargs):

~/.conda/envs/it4dnn/lib/python3.6/site-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
    436                     kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)
    437 
--> 438             applied = getattr(b, f)(**kwargs)
    439             result_blocks = _extend_blocks(applied, result_blocks)
    440 

~/.conda/envs/it4dnn/lib/python3.6/site-packages/pandas/core/internals/blocks.py in convert(self, *args, **kwargs)
   2821 
   2822         if by_item and not self._is_single_block:
-> 2823             blocks = self.split_and_operate(None, f, False)
   2824         else:
   2825             values = f(None, self.values.ravel(), None)

~/.conda/envs/it4dnn/lib/python3.6/site-packages/pandas/core/internals/blocks.py in split_and_operate(self, mask, f, inplace)
    491             # need a new block
    492             if m.any():
--> 493                 nv = f(m, v, i)
    494             else:
    495                 nv = v if inplace else v.copy()

~/.conda/envs/it4dnn/lib/python3.6/site-packages/pandas/core/internals/blocks.py in f(m, v, i)
   2812         def f(m, v, i):
   2813             shape = v.shape
-> 2814             values = fn(v.ravel(), **fn_kwargs)
   2815             if isinstance(values, np.ndarray):
   2816                 # TODO: allow EA once reshape is supported

~/.conda/envs/it4dnn/lib/python3.6/site-packages/pandas/core/dtypes/cast.py in soft_convert_objects(values, datetime, numeric, timedelta, coerce, copy)
    844         # bound of nanosecond-resolution 64-bit integers.
    845         try:
--> 846             values = lib.maybe_convert_objects(values, convert_datetime=datetime)
    847         except OutOfBoundsDatetime:
    848             pass

KeyboardInterrupt: