Refactoring¶
Model (and Parameters)¶
Old Way
from torch import nn
import math
# dimensions for parameters
input_dim = 8
output_dim = 1
n_samples = x_train.shape[0]
# weight 'matrix'
weights = nn.Parameter(
torch.randn(input_dim, output_dim) / math.sqrt(input_dim),
requires_grad=True
)
# bias vector
bias = nn.Parameter(
torch.zeros(output_dim),
requires_grad=True
)
# define model
def model(x_batch: torch.tensor):
return x_batch @ weights + bias
# set linear model
lr_model = model
class LinearModel(nn.Module):
"""A Linear Model
Parameters
----------
input_dim : int,
The input dimension for the linear model
(# input features)
output_dim : int,
the output Dimension for the linear model
(# outputs)
Attributes
----------
weights : torch.Tensor (input_dim x output_dim)
the parameter for the linear model weights
bias : torch.Tensor (output_dim)
the parameter for the linear model bias
Methods
-------
forward : torch.tensor (input_dim x output_dim)
the forward pass through the linear model
"""
def __init__(self, input_dim: int, output_dim: int):
super().__init__()
# weight 'matrix'
self.weights = nn.Parameter(
torch.randn(input_dim, output_dim) / math.sqrt(input_dim),
requires_grad=True
)
# bias vector
self.bias = nn.Parameter(
torch.zeros(output_dim),
requires_grad=True
)
def forward(self, x_batch: torch.tensor):
return x_batch @ self.weights + self.bias
input_dim = x_train.shape[1]
output_dim = y_train.shape[1]
lr_model = LinearModel(input_dim, output_dim)
model
and then we have the forward
method which allows us to perform the operation.
Loss Function¶
We can also look and use the built-in loss functions. The mse
is a very common loss function so it should be available within the library.
In PyTorch, we need to look at the nn.functional.mse_loss
module or the nn.MSELoss()
. The latter has more options as it is a class and not a function but the former will do for now. So we can change the old way:
def mse_loss(input: torch.tensor, target: torch.tensor):
return torch.mean((input - target) ** 2)
to a simplified version.
import torch.nn.functional as F
# set loss function to mse
loss_func = F.mse_loss
Optimizer¶
Another refactor opportunity is to use a built-in optimizer. I don't want to have to calculate the gradient for each of the weights multiplied by the learning rate.
from torch import optim
learning_rate = 0.01
# use stochastic gradient descent
opt = optim.SGD(lr_model.parameters(), lr=learning_rate)
Training¶
So after all of that hard work, the training procedure will look a lot cleaner because we have encapsulated a lot of operations using the built-in operations. Now we can focus on other things.
batch_size = 100
epochs = 10
n_samples = x_train.shape[0]
n_batches = (n_samples - 1) // batch_size + 1
losses = []
with tqdm.trange(epochs) as bar:
# Loop through epochs with tqdm bar
for iepoch in bar:
# Loop through batches
for idx in range(n_batches):
# get indices for batches
start_idx = idx * batch_size
end_idx = start_idx + batch_size
xbatch = x_train[start_idx:end_idx]
ybatch = y_train[start_idx:end_idx]
# predictions
ypred = lr_model(xbatch)
# loss
loss = loss_func(ypred, ybatch)
# add running loss
losses.append(loss.item())
# Loss back propagation
loss.backward()
# optimize weights
opt.step()
opt.zero_grad()
postfix = dict(
Epoch=f"{iepoch+1}",
Loss=f"{loss.item():.3f}",
)
bar.set_postfix(postfix)
Datasets and DataLoaders¶
Now there are some extra things we can do to reduce the amount of code and make this neater. We can use Datasets
and DataLoaders
.
Dataset¶
In PyTorch, the Dataset
helps us to do index and slice through our data. It also can combine inputs and outputs so that we only have to slice through a single dataset. It can even convert your np.ndarray
dataset to a Tensor automatically. So instead of
from torch.utils.data import TensorDataset
train_ds = TensorDataset(x_train, y_train)
lr_model, opt = get_lr_model()
batch_size = 100
epochs = 10
n_samples = x_train.shape[0]
n_batches = (n_samples - 1) // batch_size + 1
losses = []
with tqdm.trange(epochs) as bar:
# Loop through epochs with tqdm bar
for iepoch in bar:
# Loop through batches
for idx in range(n_batches):
# get indices for batches
start_idx = idx * batch_size
end_idx = start_idx + batch_size
# Use Dataset to store training data
xbatch, ybatch = train_ds[start_idx:end_idx]
# predictions
ypred = lr_model(xbatch)
# loss
loss = loss_func(ypred, ybatch)
# add running loss
losses.append(loss.item())
# Loss back propagation
loss.backward()
# optimize weights
opt.step()
opt.zero_grad()
postfix = dict(
Epoch=f"{iepoch+1}",
Loss=f"{loss.item():.3f}",
)
bar.set_postfix(postfix)
DataLoader¶
from torch.utils.data import TensorDataset, DataLoader
batch_size = 100
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=batch_size)
# initialize model
lr_model, opt = get_lr_model()
epochs = 10
losses = []
with tqdm.trange(epochs) as bar:
# Loop through epochs with tqdm bar
for iepoch in bar:
# Loop through batches
for xbatch, ybatch in train_dl:
# predictions
ypred = lr_model(xbatch)
# loss
loss = loss_func(ypred, ybatch)
# add running loss
losses.append(loss.item())
# Loss back propagation
loss.backward()
# optimize weights
opt.step()
opt.zero_grad()
postfix = dict(
Epoch=f"{iepoch+1}",
Loss=f"{loss.item():.3f}",
)
bar.set_postfix(postfix)
Validation set¶
So because it's so easy, we can now add that validation set. I would have dreaded doing that before due to the lengthy code. But now, it's a piece of cake.
from torch.utils.data import TensorDataset, DataLoader
# training set
batch_size = 100
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
# validation set
valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(train_ds, batch_size=batch_size)
# initialize model
lr_model, opt = get_lr_model()
epochs = 10
train_losses, valid_losses = [], []
with tqdm.trange(epochs) as bar:
# Loop through epochs with tqdm bar
for iepoch in bar:
# put in training mode
lr_model.train()
# Loop through batches
for xbatch, ybatch in train_dl:
# predictions
ypred = lr_model(xbatch)
# loss
loss = loss_func(ypred, ybatch)
# add running loss
train_losses.append(loss.item())
# Loss back propagation
loss.backward()
# optimize weights
opt.step()
opt.zero_grad()
postfix = dict(
Epoch=f"{iepoch+1}",
Loss=f"{loss.item():.3f}",
)
bar.set_postfix(postfix)
# put in evaluation model
lr_model.eval()
with torch.no_grad():
for xbatch, ybatch in valid_dl:
loss = loss_func(lr_model(xbatch), ybatch)
valid_losses.append(loss)
Appendix¶
Python Concepts¶
Python Classes
More information can be found here.
Comments
Do them. Always. It might seem like a sunk cost, but it will save you time in the end.