PyTorch 通过示例学习 PyTorch

本教程将详细讲解 PyTorch 的基础知识。通过本教程，你将快速掌握 PyTorch 的核心概念和操作。

一、PyTorch 简介

PyTorch 是一个开源的机器学习库，广泛应用于计算机视觉、自然语言处理等领域。它具有动态计算图、易于使用的 API 和强大的社区支持等特点。

二、张量操作

张量是 PyTorch 中的基本数据结构，类似于 NumPy 中的数组。

import torch


## 创建张量
x = torch.tensor([1, 2, 3])
y = torch.tensor([4, 5, 6])


## 张量运算
z = x + y
print(z)

三、自动求导

PyTorch 提供了自动求导功能，可以自动计算梯度。

x = torch.tensor(2.0, requires_grad=True)
y = x ** 2
y.backward()
print(x.grad)

四、模型构建

使用 PyTorch 构建神经网络模型非常简单。

import torch.nn as nn


class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.linear = nn.Linear(10, 5)


    def forward(self, x):
        return self.linear(x)


model = SimpleModel()
print(model)

五、模型训练

训练模型是深度学习中的关键步骤。

import torch.optim as optim


## 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)


## 训练模型
for epoch in range(10):
    optimizer.zero_grad()
    output = model(torch.randn(1, 10))
    loss = criterion(output, torch.randn(1, 5))
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch + 1}, Loss: {loss.item()}')

六、模型保存与加载

训练完成后，可以保存和加载模型。

## 保存模型
torch.save(model.state_dict(), 'model.pth')


## 加载模型
model.load_state_dict(torch.load('model.pth'))

七、通过示例学习 PyTorch

示例 1：使用 NumPy 实现网络

在介绍 PyTorch 之前，我们先使用 NumPy 实现一个简单的网络。

import numpy as np


## N 是批量大小，D_in 是输入维度，H 是隐藏层维度，D_out 是输出维度
N, D_in, H, D_out = 64, 1000, 100, 10


## 创建随机输入和输出数据
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)


## 随机初始化权重
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)


learning_rate = 1e-6
for t in range(500):
    # 前向传播：计算预测的 y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)


    # 计算损失
    loss = np.square(y_pred - y).sum()
    print(t, loss)


    # 反向传播：计算 w1 和 w2 的梯度
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)


    # 更新权重
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

示例 2：使用 PyTorch 张量

使用 PyTorch 张量实现相同的网络，利用 GPU 加速计算。

dtype = torch.float
device = torch.device("cpu")


## N 是批量大小，D_in 是输入维度，H 是隐藏层维度，D_out 是输出维度
N, D_in, H, D_out = 64, 1000, 100, 10


## 创建随机输入和输出数据
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)


## 随机初始化权重
w1 = torch.randn(D_in, H, device=device, dtype=dtype)
w2 = torch.randn(H, D_out, device=device, dtype=dtype)


learning_rate = 1e-6
for t in range(500):
    # 前向传播：计算预测的 y
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)


    # 计算损失
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)


    # 反向传播：计算 w1 和 w2 的梯度
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.t().mm(grad_h)


    # 更新权重
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

示例 3：使用自动求导

利用 PyTorch 的自动求导功能，简化反向传播过程。

dtype = torch.float
device = torch.device("cpu")


## N 是批量大小，D_in 是输入维度，H 是隐藏层维度，D_out 是输出维度
N, D_in, H, D_out = 64, 1000, 100, 10


## 创建随机输入和输出数据
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)


## 随机初始化权重，设置 requires_grad=True 表示需要计算梯度
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)


learning_rate = 1e-6
for t in range(500):
    # 前向传播：计算预测的 y
    y_pred = x.mm(w1).clamp(min=0).mm(w2)


    # 计算损失
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())


    # 反向传播：自动计算梯度
    loss.backward()


    # 使用梯度下降更新权重
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad


        # 清除梯度
        w1.grad.zero_()
        w2.grad.zero_()

示例 4：定义新的自动求导函数

自定义自动求导函数，实现 ReLU 激活函数。

class MyReLU(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return input.clamp(min=0)


    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input < 0] = 0
        return grad_input


dtype = torch.float
device = torch.device("cpu")


## N 是批量大小，D_in 是输入维度，H 是隐藏层维度，D_out 是输出维度
N, D_in, H, D_out = 64, 1000, 100, 10


## 创建随机输入和输出数据
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)


## 随机初始化权重，设置 requires_grad=True 表示需要计算梯度
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)


learning_rate = 1e-6
for t in range(500):
    # 使用自定义的 ReLU 函数
    relu = MyReLU.apply
    y_pred = relu(x.mm(w1)).mm(w2)


    # 计算损失
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())


    # 反向传播：自动计算梯度
    loss.backward()


    # 使用梯度下降更新权重
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad


        # 清除梯度
        w1.grad.zero_()
        w2.grad.zero_()

示例 5：使用 `nn` 模块

使用 PyTorch 的 nn 模块构建神经网络。

## N 是批量大小，D_in 是输入维度，H 是隐藏层维度，D_out 是输出维度
N, D_in, H, D_out = 64, 1000, 100, 10


## 创建随机输入和输出数据
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)


## 使用 nn.Sequential 定义模型
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)


## 定义损失函数
loss_fn = torch.nn.MSELoss(reduction='sum')


learning_rate = 1e-4
for t in range(500):
    # 前向传播
    y_pred = model(x)


    # 计算损失
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())


    # 反向传播
    model.zero_grad()
    loss.backward()


    # 更新权重
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

示例 6：使用优化器

使用 PyTorch 的优化器简化权重更新过程。

## N 是批量大小，D_in 是输入维度，H 是隐藏层维度，D_out 是输出维度
N, D_in, H, D_out = 64, 1000, 100, 10


## 创建随机输入和输出数据
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)


## 使用 nn.Sequential 定义模型
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)


## 定义损失函数和优化器
loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


for t in range(500):
    # 前向传播
    y_pred = model(x)


    # 计算损失
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())


    # 反向传播
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

示例 7：自定义 `nn` 模块

定义自己的神经网络模块。

class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, D_out)


    def forward(self, x):
        h_relu = self.linear1(x).clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred


## N 是批量大小，D_in 是输入维度，H 是隐藏层维度，D_out 是输出维度
N, D_in, H, D_out = 64, 1000, 100, 10


## 创建随机输入和输出数据
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)


## 创建模型
model = TwoLayerNet(D_in, H, D_out)


## 定义损失函数和优化器
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)


for t in range(500):
    # 前向传播
    y_pred = model(x)


    # 计算损失
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())


    # 反向传播
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

示例 8：控制流与权重共享

实现一个具有动态隐藏层数量的神经网络。

class DynamicNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        super(DynamicNet, self).__init__()
        self.input_linear = torch.nn.Linear(D_in, H)
        self.middle_linear = torch.nn.Linear(H, H)
        self.output_linear = torch.nn.Linear(H, D_out)


    def forward(self, x):
        h_relu = self.input_linear(x).clamp(min=0)
        for _ in range(random.randint(0, 3)):
            h_relu = self.middle_linear(h_relu).clamp(min=0)
        y_pred = self.output_linear(h_relu)
        return y_pred


## N 是批量大小，D_in 是输入维度，H 是隐藏层维度，D_out 是输出维度
N, D_in, H, D_out = 64, 1000, 100, 10


## 创建随机输入和输出数据
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)


## 创建模型
model = DynamicNet(D_in, H, D_out)


## 定义损失函数和优化器
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)


for t in range(500):
    # 前向传播
    y_pred = model(x)


    # 计算损失
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())


    # 反向传播
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

通过以上示例，你可以快速掌握 PyTorch 的基本操作和模型构建方法。在编程狮（W3Cschool）平台上，你可以找到更多关于 PyTorch 的详细教程和示例代码，帮助你深入理解和应用 PyTorch 进行深度学习开发。