RBIG Demo¶
import sys
sys.path.insert(0, '/home/emmanuel/code/py_packages/rbig/')
sys.path.insert(0, '/home/emmanuel/code/rbig/')
sys.path.insert(0, '/Users/eman/Documents/code_projects/rbig/')
import numpy as np
import warnings
from time import time
from rbig.rbig import RBIG, entropy
# from rbig.model import RBIG
from sklearn.model_selection import train_test_split
from sklearn.utils import check_random_state
import matplotlib.pyplot as plt
plt.style.use('ggplot')
warnings.filterwarnings('ignore') # get rid of annoying warnings
%matplotlib inline
%load_ext autoreload
%autoreload 2
Toy Data¶
seed = 123
rng = np.random.RandomState(seed=seed)
num_samples = 10_000
x = np.abs(2 * rng.randn(1, num_samples))
y = np.sin(x) + 0.25 * rng.randn(1, num_samples)
data = np.vstack((x, y)).T
fig, ax = plt.subplots()
ax.scatter(data[:, 0], data[:, 1], s=1)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Original Data')
plt.show()
RBIG Fitting¶
%%time
n_layers = 1000
rotation_type = 'pca'
random_state = 123
zero_tolerance = 10
# Initialize RBIG class
rbig_model = RBIG(n_layers=n_layers, rotation_type=rotation_type,
random_state=random_state, zero_tolerance=zero_tolerance)
# transform data
data_trans = rbig_model.fit_transform(data)
Transform Data into Gaussian¶
print(data_trans.shape)
fig, ax = plt.subplots()
ax.scatter(data_trans[:, 0], data_trans[:, 1], s=1)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Data after RBIG Transformation')
plt.show()
Invertible¶
%%time
# transform data
data_approx = rbig_model.inverse_transform(data_trans)
# check approximation
np.testing.assert_array_almost_equal(data, data_approx)
Check Residuals¶
data_approx = rbig_model.inverse_transform(data_trans)
residual = np.abs(data - data_approx).sum().sum()
print(f'Residual from Original and Transformed: {residual:.2e}')
fig, ax = plt.subplots()
ax.scatter(data_approx[:, 0], data_trans[:, 1], s=1)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Inverse Transformation')
plt.show()
Information Reduction¶
residual_info = rbig_model.residual_info
fig, ax = plt.subplots()
ax.plot(np.cumsum(rbig_model.residual_info))
ax.set_title('Information Reduction')
plt.show()
Generated Synthetic Data¶
data_synthetic = rng.randn(data.shape[0], data.shape[1])
fig, ax = plt.subplots()
ax.scatter(data_synthetic[:, 0], data_synthetic[:, 1], s=1)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Synthetically generated factorial gaussian data')
plt.show()
Synthesize New Data from RBIG Model¶
data_original_synthetic = rbig_model.inverse_transform(data_synthetic)
fig, ax = plt.subplots()
ax.scatter(data_original_synthetic[:, 0], data_original_synthetic[:, 1], s=1)
# ax.scatter(data[:, 0], data[:, 1], s=1)
ax.set_ylim([-1.5, 2.0])
ax.set_xlim([0.0, 9.0])
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Synthetically generated data from the input distribution')
plt.show()
Jacobian¶
%%time
jacobian = rbig_model.jacobian(data, return_X_transform=False)
print(jacobian.shape)
print(f"Jacobian - min: {jacobian.min():.3e}, max: {jacobian.max():.3e}")
Estimating Probabilities with RBIG¶
%%time
prob_input, prob_gauss = rbig_model.predict_proba(data, domain='both', n_trials=1)
print(f"Prob Input Domain - min: {prob_input.min():.3e}, max: {prob_input.max():.3e}")
print(f"Prob Gauss Domain - min: {prob_gauss.min():.3e}, max: {prob_gauss.max():.3e}")
print(f"Det:: {rbig_model.det_jacobians:.3e}")
Original Data with Probabilities¶
fig, ax = plt.subplots()
ax.hist(prob_input, 50, facecolor='green', alpha=0.75)
plt.show()
fig, ax = plt.subplots()
h = ax.scatter(data[:, 0], data[:, 1], s=1, c=prob_input, cmap='Reds')
ax.set_xlabel('X')
ax.set_ylabel('Y')
cbar = plt.colorbar(h, )
ax.set_title('Original Data w/ Probabilities')
plt.show()
Probability in Gaussian Domain¶
# Plot the probability of the data in the Gaussian Domain
fig, ax = plt.subplots()
n, bins, patches = ax.hist(prob_gauss, 50, facecolor='green', alpha=0.75)
ax.set_title('Probability in Gaussian domain.')
plt.show()
# Plot the Probabilities of the data using colors
fig, ax = plt.subplots()
g = ax.scatter(data_trans[:, 0], data_trans[:, 1],
s=1, c=prob_gauss)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Data after RBIG transform w/ Probabilities')
plt.colorbar(g)
plt.show()
Benchmarks¶
data = np.random.randn(100_000, 100)
%%time
n_layers = 1000
rotation_type = 'pca'
random_state = 123
zero_tolerance = 10
# Initialize RBIG class
rbig_model = RBIG(
n_layers=n_layers,
rotation_type=rotation_type,
random_state=random_state,
zero_tolerance=zero_tolerance,
pdf_resolution=50,
)
# transform data
data_trans = rbig_model.fit_transform(data)
rbig_model.n_layers
from rbig.model import RBIG as RBIG11
%%time
n_layers = 1000
rotation_type = 'pca'
random_state = 123
zero_tolerance = 60
verbose=0
method = 'custom'
# Initialize RBIG class
rbig_model = RBIG11(
n_layers=n_layers,
rotation_type=rotation_type,
random_state=random_state,
zero_tolerance=zero_tolerance,
verbose=verbose,
method=method,
pdf_resolution=50,
)
# transform data
data_trans = rbig_model.fit_transform(data)
residual_info = rbig_model.residual_info
plt.
%%time
data_inverted = rbig_model.inverse_transform(data_trans)
%%time
prob_input, prob_gauss = rbig_model.predict_proba(data, domain='both', n_trials=1)
plt.plot(np.cumsum(rbig_model.residual_info))