1.1 - Univariate Gaussianization#

!pip install "git+https://github.com/IPL-UV/rbig.git"
import sys, os
from pyprojroot import here

# spyder up to find the root
root = here(project_files=[".here"])

# append to path
sys.path.append(str(root))

%load_ext autoreload
%autoreload 2

Import Packages#

import numpy as np

from sklearn.datasets import make_moons

# Packages
import numpy as np
import pytorch_lightning as pl


# Plot
import matplotlib.pyplot as plt
import corner

import wandb

pl.seed_everything(1234)

%load_ext autoreload
%autoreload 2

Logging#

# TODO

Data#

ntrain = 10_000
ntest = 10_000
noise = 0.1
random_state = 123
train_data = make_moons(n_samples=ntrain, noise=noise, random_state=random_state)[0]
test_data = make_moons(n_samples=ntest, noise=noise, random_state=random_state * 10)[0]

Plot Data#

fig = plt.figure(figsize=(7, 7))

corner.corner(train_data, color="blue", fig=fig)

plt.show()

Flow Transformation#

Univariate Gaussianization#

Uniformization#

X_1 = train_data[:, 0][:, None]
X_2 = train_data[:, 1][:, None]

fig = corner.corner(X_1)
from rbig._src.uniform import MarginalHistogramUniformization
hist_uni_bijector = MarginalHistogramUniformization(X_1)
from typing import NamedTuple, Callable, Union
from scipy import stats


class UnivariateHistogram:
    def __init__(
        self,
        X: np.ndarray,
        bins: Union[int, str] = 10,
        alpha: float = 1e-10,
        bound_ext: float = 0.1,
    ):

        diff = X.max() - X.min()
        lower_bound = X.min() - bound_ext * diff
        upper_bound = X.max() + bound_ext * diff

        # create histogram
        hist = np.histogram(X, bins=bins, range=(lower_bound, upper_bound))

        # create histogram object
        estimator = stats.rv_histogram(hist)

        # add some regularization
        estimator._hpdf += alpha

        self.estimator = estimator

    def forward(self, X):

        return self.estimator.cdf(X)

    def inverse(self, X):

        return self.estimator.ppf(X)

    def gradient(self, X):

        return self.estimator.pdf(X)
hist_uni_bijector = UnivariateHistogram(X_1, bins="scott")

Forward Transformation#

Xu_1 = hist_uni_bijector.forward(X_1)
fig = corner.corner(Xu_1, color="red")

Inverse Transformation#

X_1_approx = hist_uni_bijector.inverse(Xu_1)
fig = corner.corner(X_1_approx, color="green")

Det Jacobian#

Xldj_1 = hist_uni_bijector.gradient(X_1)
fig = corner.corner(Xldj_1)

Density Estimation#

from scipy import stats

base_dist = stats.uniform()
# latent probability
pz = base_dist.pdf(Xu_1)

# det jacobian
det_jacobian = hist_uni_bijector.gradient(X_1)

# total prob
p_x = pz.sum(axis=1) * det_jacobian
fig, ax = plt.subplots()
ax.hist(p_x, bins=25)
plt.show()

Sampling#

# generate samples in uniform domain
pz_samples = base_dist.rvs(size=10_000)[:, None]

# propagate them through
X_1_samples = hist_uni_bijector.inverse(pz_samples)
fig, ax = plt.subplots()
ax.hist(X_1, bins=25, color="blue", density=True, label="True")
ax.hist(X_1_samples, bins=25, color="green", density=True, label="Generated")
plt.legend()
plt.tight_layout()
plt.show()

Inverse CDF#

class InverseGaussCDF:
    def __init__(self, eps: float = 1e-5):
        self.eps = eps

        # create histogram object
        self.estimator = stats.norm(loc=0, scale=1)

    def forward(self, X):

        X = np.clip(X, self.eps, 1 - self.eps)

        Z = self.estimator.ppf(X)

        return Z

    def inverse(self, Z):
        X = self.estimator.cdf(Z)

        return X

    def gradient(self, X):
        X = self.inverse(X)

        det_jacobian = -self.estimator.pdf(X)

        return det_jacobian


def inverse_cdf(eps: float = 1e-5):
    def forward(X):

        return X

    def inverse(X):

        return X

    def gradient(X):

        return X

    return Bijector(forward=forward, inverse=inverse, gradient=gradient)
from rbig._src.invcdf import InverseGaussCDF
invcdf_bijector = InverseGaussCDF(eps=1e-7)

Forward#

Xg_1 = invcdf_bijector.forward(Xu_1)

Inverse#

Xu_1_approx = invcdf_bijector.inverse(Xg_1)
fig = corner.corner(Xu_1_approx)
fig, ax = plt.subplots()
ax.hist(Xu_1, bins=25, color="blue", density=True, label="True")
ax.hist(Xu_1_approx, bins=25, color="green", density=True, label="Inverse")
plt.legend()
plt.tight_layout()
plt.show()

Gradient#

Xg_1_gradient = invcdf_bijector.gradient(Xu_1)
fig = corner.corner(Xg_1_gradient)

Density Estimation#

# 1) uniformization
Xu_1 = hist_uni_bijector.forward(X_1)
det_jacobian_u = hist_uni_bijector.gradient(X_1)

# 2) gaussianization
Xg_1 = hist_uni_bijector.forward(Xu_1)
det_jacobian_g = hist_uni_bijector.gradient(Xu_1)

# latent probability
base_dist = stats.norm(loc=0, scale=1)

pz = base_dist.pdf(Xg_1).sum(axis=1)


# total prob
p_x = pz * det_jacobian_g * det_jacobian_u
fig = corner.corner(p_x)

Sampling#

# 1) samples from latent probability
base_dist = stats.norm(loc=0, scale=1)
pz_samples = base_dist.rvs(size=10_000)[:, None]

# 2) inverse gaussianization
Xu_1_samples = hist_uni_bijector.inverse(pz_samples)

# 3) inverse uniformization
X_1_samples = hist_uni_bijector.inverse(Xu_1_samples)
fig, ax = plt.subplots()
ax.hist(X_1, bins=25, color="blue", density=True, label="True")
ax.hist(X_1_samples, bins=25, color="green", density=True, label="Samples")
plt.legend()
plt.tight_layout()
plt.show()

#