Simulated Distributions¶
In this notebook, I will walk through some of the distributions we used in order to generate some fake data.
import sys, os
cwd = os.getcwd()
sys.path.insert(0, f'{cwd}/../../src')
sys.path.insert(0, f'{cwd}/../../src/itetoolbox')
import numpy as np
import ite
from sklearn.utils import check_random_state
from data.toy import entropy_marginal
%matplotlib inline
%load_ext autoreload
%autoreload 2
Analytical Values¶
Uniform Distribution¶
where a,b are the support for the distribution.
We can measure the entropy as:
Additionally, if we want to measure the entropy of x that is generated by some random transformation A then we have:
where |\cdot| is the determinant operator.
Multivariate Uniform Distribution
Note: I saw in the code that they do the prod of the support and then calculate the log function. Because every dimension is independent so we can perhaps just sum them. But I'm not sure about the product. ???
(b-a)
a = -1
b = 1
loc = -1
scale = (b-a)
uni_var = stats.uniform.rvs(loc=loc, scale=scale, size=(10, 2), random_state=123)
stats.uniform.entropy(loc=a, scale=(b-a))
from scipy import stats
np.random.seed(123)
d_dimensions = 2
support_a = -np.random.rand(1, d_dimensions)
support_b = np.random.rand(1, d_dimensions)
# random rotation matrix
A = np.random.rand(d_dimensions, d_dimensions)
# Compute entropy of uniform dist
H_uni = np.log(np.prod(support_b - support_a))
# computer entropy of linear transformation
H_A = np.linalg.slogdet(A)[1]
print(H_uni)
Gaussian Distribution¶
cov = np.array([[1, 0.9], [0.9, 1]])
print(cov.shape)
mu = [0.0, 0.0]
seed = 123
n_samples = 100,
d_dimensions = 2
norm_dist = stats.multivariate_normal(mean=mu, cov=cov, seed=seed)
# norm_dist.entropy()
norm_dist
Dirichlet Distribution¶
alpha = 0.1
seed = 123
diri_dist = stats.dirichlet.rvs(alpha=alpha, size=(3,1), random_state=seed)
T-Student Distribution¶
Distribution¶
Multivariate
$$\frac{\Gamma \left[ \frac{(\nu + p)}{2} \right]} {\Gamma \left(\frac{\nu}{2} \right)\nu^{\frac{p}{2}} \pi^{\frac{p}{2}} \left|\Sigma \right|^{\frac{1}{2}}} \left[ 1 + \frac{1}{\nu} (x - \mu)^\top \Sigma^{-1}(x - \mu) \right]^{- \frac{(\nu + p}{2}} $$
Entropy
The differential entropy of the multivariate student-t distribution when the covariance matrix is the identity.
where:
- \Psi is the digamma function
- \Gamma is the gamma function
If we have the case where we have a covariance matrix \Sigma, we can use the relationship:
where: * x is the standard Student-t random vector * \mu is the mean * \Sigma=LL^\top is the covariance matrix
We know the properties of differential entropy yields the following equation:
So we can rewrite the Student-t distribution with a mean \mu and a covariance matrix \Sigma as the additive product of the original distribution with a covariance I and the change of variables:
Source
Distribution Class Generator¶
class DistData:
def __init__(
self,
n_samples: int=1000,
d_dimensions: int=3,
distribution: str="gauss",
mu: float=0.0,
sigma: float=1.0,
weight: float=2.0,
bias: float=0.5,
nu: float=1.0,
gauss_state: int=123,
dim_state: int=111,
trans_state: int=123,
)-> None:
self.n_samples = n_samples
self.d_dimensions = d_dimensions
self.distribution = distribution
self.mu = mu
self.sigma = sigma
self.weight = weight
self.bias = bias
self.nu = nu
self.gauss = check_random_state(gauss_state)
self.dim_state = check_random_state(dim_state)
self.trans_state = check_random_state(trans_state)
def data(self):
if self.distribution == "gauss":
# generate data Gaussian data
self.samples = self.mu + self.sigma * self.gauss.randn(self.n_samples, self.d_dimensions)
# random rotation (uniformly distributed)
self.A = self.trans_state.rand(self.d_dimensions, self.d_dimensions)
# output data
self.X = self.samples @ self.A
elif self.distribution == 'linear':
# generate data from normal dist
self.samples= self.mu + self.sigma * self.gauss.randn(self.n_samples, self.d_dimensions)
# random rotation (uniformly distributed)
d_rot = self.dim_state.randn(1, self.d_dimensions)
# linear transformation on all dimensions
for idim in range(self.d_dimensions):
exponent = self.weight * d_rot[:, idim] + self.bias
self.samples[:, idim] = np.sign(self.samples[:, idim]) * np.abs(self.samples[:, idim])**exponent
# random rotation (uniformly distributed)
self.A = self.trans_state.rand(self.d_dimensions, self.d_dimensions)
self.X = self.samples @ self.A
else:
raise ValueError("Unrecognized distribution...")
return self.X
def entropy(self):
if self.distribution == "gauss":
# calculate entropy
return entropy_marginal(self.X).sum() + np.linalg.slogdet(self.A)[1]
if self.distribution == "linear":
# calculate entropy
return entropy_marginal(self.X).sum() + np.linalg.slogdet(self.A)[1]
else:
raise ValueError("Unrecognized distribution...")
Distribution I - Rotated Gaussian Dataset¶
random_state = 123
mu = 0.0
sigma = 1.0
n_samples = 10000
d_dimensions = 100
distribution = 'gauss'
# initialize class
clf_datadist = DistData(
n_samples=n_samples,
mu = mu,
sigma = sigma,
d_dimensions=d_dimensions
)
# generate samples
X = clf_datadist.data()
# calculate entropy
H_x = clf_datadist.entropy()
print(f"Entropy: {H_x:.4f}")
Distribution II - Rotated Linear Dataset¶
random_state = 123
mu = 0.0
sigma = 1.0
n_samples = 10000
d_dimensions = 100
distribution = 'linear'
# initialize class
clf_datadist = DistData(
n_samples=n_samples,
mu = mu,
sigma = sigma,
d_dimensions=d_dimensions,
distribution=distribution
)
# generate samples
X = clf_datadist.data()
# calculate entropy
H_x = clf_datadist.entropy()
print(f"Entropy: {H_x:.4f}")