Rbf kernel
This snippet showcases using PyTorch and calculating a kernel function. Below I have a sample script to do an RBF function along with the gradients in PyTorch.
from typing import Union
import numpy as np
import torch # GPU + autodiff library
from torch.autograd import grad
class RBF:
def __init__(
self,
length_scale: float=1.0,
signal_variance: float=1.0,
device: Union[Bool,str]=None)
-> None:
# initialize parameters
self.length_scale = torch.tensor(
length_scale, dtype=torch.float32, device=self.device,
requires_grad=True
)
self.signal_variance = torch.tensor(
signal_variance, dtype=torch.float32, device=self.device,
requires_grad=True
)
if device is None:
self.device = torch.device("cpu")
else:
self.device = device
def __call__(
self,
X: np.ndarray,
Y: Union[Bool, np.ndarray]=None)
-> np.ndarray:
# convert inputs to pytorch tensors
X = torch.tensor(X, dtype=torch.float32, device=self.device)
if Y is None:
Y = X
else:
Y = torch.tensor(Y, dtype=torch.float32, device=self.device)
# Divide by length scale
X = torch.div(X, self.length_scale)
Y = torch.div(Y, self.length_scale)
# Re-indexing
X_i = X[:, None, :] # shape (N, D) -> (N, 1, D)
Y_j = Y[None, :, :] # shape (N, D) -> (1, N, D)
# Actual Computations
sqd = torch.sum( (X_i - Y_j)**2, 2) # |X_i - Y_j|^2
K_qq = torch.exp( -0.5 * sqd ) # Gaussian Kernel
K_qq = torch.mul(self.signal_variance, K_qq) # Signal Variance
return K_qq.detach().to_numpy()
def gradient_X(self, X):
return None
def gradient_X2(self, X):
return None
def gradient_XX(
self,
X: np.ndarray,
Y: Union[Bool, np.ndarray]=None)
-> np.ndarray:
# Convert to tensor that requires Grad
X = torch.tensor(
length_scale, dtype=torch.float32, device=self.device,
requires_grad=True
)
if Y is None:
Y = X
else:
Y = torch.tensor(
Y, dtype=torch.float32, device=self.device,
requires_grad=True
)
# compute the gradient kernel w.r.t. to the two inputs
J = grad(self.__call__(X, Y))
return J
def gradient_XX2(self, X, Y=None):
return None
Below we can see how one would actually do that in practice.
# With PyTorch, using the GPU is simple
use_gpy = torch.cuda.is_available()
dtype = torch.cuda.FloatTensor if use_gpu else torch.FloatTensor
N = 5000 # cloud of 5,000 points
D = 3 # 3D
q = np.random.rand(N, D)
p = np.random.rand(N, D)
s = 1.
# Store arbitrary arrays on the CPU or GPU:
q = torch.from_numpy(q).type(dtype)
p = torch.from_numpy(p).type(dtype)
s = torch.Tensor([1.]).type(dtype)
# Tell PyTorch to track the variabls "q" and "p"
q.requires_grad = True
p.requires_grad = True
# Rescale with length_scale
q = torch.div(q, s)
# Re-indexing
q_i = q[:, None, :] # shape (N, D) -> (N, 1, D)
q_j = q[None, :, :] # shape (N, D) -> (1, N, D)
# Actual Computations
sqd = torch.sum( (q_i - q_j)**2, 2) # |q_i - q_j|^2
K_qq = torch.exp( -sqd / s**2 ) # Gaussian Kernel
v = K_qq @ p # matrix mult. (N,N) @ (N,D) = (N,D)
# Automatic Differentiation
[dq, dp] = grad( H, [q,p] )
# Hamiltonian H(q,p): .5*<p,v>
H = .5 * torch.dot( p.view(-1), v.view(-1) )
Source: Presentation for Autograd and mathematics.