PyTorch_CrashCourse
PyTorch_CrashCourse
December 9, 2024
0.1 1. Tensors
Everything in PyTorch is based on Tensor operations. A Tensor is a multi-dimensional matrix
containing elements of a single data type:
[1]: import torch
# torch.empty(size): uninitiallized
x = torch.empty(1) # scalar
print("empty(1):", x)
x = torch.empty(3) # vector
print("empty(3):",x)
x = torch.empty(2, 3) # matrix
print("empty(2,3):",x)
x = torch.empty(2, 2, 3) # tensor, 3 dimensions
#x = torch.empty(2,2,2,3) # tensor, 4 dimensions
print("empty(2, 2, 3):",x)
empty(1): tensor([3.3631e-44])
empty(3): tensor([-3.1612e-12, 3.0753e-41, 6.4666e-37])
empty(2,3): tensor([[-7.5699e+33, 3.0760e-41, -7.5689e+33],
[ 3.0760e-41, 1.4013e-45, 1.3873e-43]])
empty(2, 2, 3): tensor([[[-3.1650e-12, 3.0753e-41, 0.0000e+00],
[ 1.4013e-45, 0.0000e+00, 0.0000e+00]],
1
[0.0887, 0.0514, 0.7282],
[0.3310, 0.5505, 0.9558],
[0.5535, 0.4563, 0.0331],
[0.3945, 0.9151, 0.9658]])
zeros(5,3): tensor([[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.]])
# check type
print(x.dtype)
2
# elementwise addition
z = x + y
# torch.add(x,y)
print(x)
print(y)
print(z)
[ ]: # subtraction
z = x - y
z = torch.sub(x, y)
# multiplication
z = x * y
z = torch.mul(x,y)
# division
z = x / y
z = torch.div(x,y)
[ ]: # Slicing
x = torch.rand(5,3)
print(x)
print("x[:, 0]", x[:, 0]) # all rows, column 0
print("x[1, :]", x[1, :]) # row 1, all columns
print("x[1, 1]", x[1,1]) # element at 1, 1
NumPy Converting a Torch Tensor to a NumPy array and vice versa is very easy
3
[6]: a = torch.ones(5)
print(a)
[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
GPU Support By default all tensors are created on the CPU. But we can also move them to
the GPU (if it’s available ), or create them directly on the GPU.
4
[ ]: device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
0.2 2. Autograd
The autograd package provides automatic differentiation for all operations on Tensors. Generally
speaking, torch.autograd is an engine for computing the vector-Jacobian product. It computes
partial derivates while applying the chain rule.
Set requires_grad = True:
# The gradient for this tensor will be accumulated into .grad attribute.
# It is the partial derivate of the function w.r.t. the tensor
print(x.grad)
5
z.backward()
print(x.grad) # dz/dx
# !!! Careful!!! backward() accumulates the gradient for this tensor into .grad␣
↪attribute.
None
tensor([1.8299, 4.6817, 2.3913])
Stop a tensor from tracking history: For example during the training loop when we want to
update our weights, or after training during evaluation. These operations should not be part of the
gradient computation. To prevent this, we can use:
• x.requires_grad_(False)
• x.detach()
• wrap in with torch.no_grad():
a.requires_grad_(True)
b = (a * a).sum()
print(a.requires_grad)
print(b.grad_fn)
False
None
True
<SumBackward0 object at 0x793f3830efb0>
[12]: # .detach(): get a new Tensor with the same content but no gradient computation:
a = torch.randn(2, 2, requires_grad=True)
b = a.detach()
print(a.requires_grad)
print(b.requires_grad)
True
False
6
print(b.requires_grad)
True
False
# Linear regression
# f = w * x + b
# here : f = 2 * x
# model output
def forward(x):
return w * x
# loss = MSE
def loss(y, y_pred):
return ((y_pred - y)**2).mean()
X_test = 5.0
[ ]: # Training
learning_rate = 0.01
n_epochs = 100
# loss
l = loss(Y, y_pred)
7
# calculate gradients = backward pass
l.backward()
# update weights
#w.data = w.data - learning_rate * w.grad
with torch.no_grad():
w -= learning_rate * w.grad
if (epoch+1) % 10 == 0:
print(f'epoch {epoch+1}: w = {w.item():.3f}, loss = {l.item():.3f}')
# Linear regression
# f = w * x
# here : f = 2 * x
n_samples = 8, n_features = 1
8
[ ]: # 1) Design Model, the model has to implement the forward pass!
class LinearRegression(nn.Module):
def __init__(self, input_dim, output_dim):
super(LinearRegression, self).__init__()
# define different layers
self.lin = nn.Linear(input_dim, output_dim)
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# 3) Training loop
for epoch in range(n_epochs):
# predict = forward pass with our model
y_predicted = model(X)
# loss
l = loss(Y, y_predicted)
# update weights
optimizer.step()
if (epoch+1) % 10 == 0:
9
w, b = model.parameters() # unpack parameters
print('epoch ', epoch+1, ': w = ', w[0][0].item(), ' loss = ', l.item())
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
input_size = 784 # 28x28
hidden_size = 500
num_classes = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.001
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = torchvision.datasets.MNIST(root='./data',
train=False,
transform=transforms.ToTensor())
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
examples = iter(test_loader)
10
example_data, example_targets = examples.next()
for i in range(6):
plt.subplot(2,3,i+1)
plt.imshow(example_data[i][0], cmap='gray')
plt.show()
11
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
if (i+1) % 100 == 0:
print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/
↪{n_total_steps}], Loss: {loss.item():.4f}')
12
outputs = model(images)
0.6 5. CNN
This section covers:
• Convolutional Layers
• MaxPooling
• Save/Load model
[ ]: import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
num_epochs = 10
batch_size = 32
learning_rate = 0.001
# CIFAR10: 60000 32x32 color images in 10 classes, with 6000 images per class
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
13
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
shuffle=True)
def imshow(imgs):
imgs = imgs / 2 + 0.5 # unnormalize
npimgs = imgs.numpy()
plt.imshow(np.transpose(npimgs, (1, 2, 0)))
plt.show()
14
[ ]: class ConvNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 32, 3)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(32, 64, 3)
self.conv3 = nn.Conv2d(64, 64, 3)
self.fc1 = nn.Linear(64*4*4, 64)
self.fc2 = nn.Linear(64, 10)
model = ConvNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
running_loss = 0.0
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
running_loss += loss.item()
15
print(f'[{epoch + 1}] loss: {running_loss / n_total_steps:.3f}')
print('Finished Training')
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)
[ ]: loaded_model = ConvNet()
loaded_model.load_state_dict(torch.load(PATH)) # it takes the loaded␣
↪dictionary, not the path file itself
loaded_model.to(device)
loaded_model.eval()
with torch.no_grad():
n_correct = 0
n_correct2 = 0
n_samples = len(test_loader.dataset)
outputs2 = loaded_model(images)
_, predicted2 = torch.max(outputs2, 1)
n_correct2 += (predicted2 == labels).sum().item()
16
print(f'Accuracy of the loaded model: {acc} %')
[ ]:
17