本次分享一个简单的使用PyTorch进行图像分类模型搭建的小案例,让大家对PyTorch的流程有一个认知。
import torch
import torch.nn as nn
import torchvision
import numpy as np
from torch.autograd import Variable
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.utils.data as Data
from torchvision import datasets, models, transforms
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"设备已设置为:{device}")
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
print("数据预处理已完成:将图像转换为Tensor并进行标准化。")
# 加载 CIFAR-10 数据集
trainset = torchvision.datasets.CIFAR10(root='./', train=True, download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=False, num_workers=4)
testset = torchvision.datasets.CIFAR10(root='./', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=True, num_workers=4)
# 定义类名
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
# 输出数据集信息
print("CIFAR-10 数据集加载完成。")
print(f"训练集样本数: {len(trainset)}")
print(f"测试集样本数: {len(testset)}")
# 显示几个样例图像
def imshow(img):
img = img / 2 + 0.5 # 反标准化
npimg = img.numpy() # 转为NumPy格式
plt.imshow(np.transpose(npimg, (1, 2, 0))) # 转换维度以适应imshow显示
plt.show()
# 获取训练数据中的一个batch
dataiter = iter(trainloader)
images, labels = next(dataiter)
# 输出真实标签
print('真实标签: ', ' '.join([f'{classes[labels[j]]:5s}' for j in range(4)]))
# 显示图像
imshow(torchvision.utils.make_grid(images))
输出:
Files already downloaded and verified
CIFAR-10 数据集加载完成。
训练集样本数: 50000
测试集样本数: 10000
真实标签: frog truck truck deer
可以看到图像有青蛙,卡车和鹿。
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
print("神经网络结构已定义:卷积层和全连接层。")
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
print(net)
print("损失函数和优化器已定义:交叉熵损失和SGD优化器。")
输出:
Net(
(conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(fc1): Linear(in_features=400, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)
损失函数和优化器已定义:交叉熵损失和SGD优化器。
nums_epoch = 2
print(f"开始训练,共训练 {nums_epoch} 轮。")
for epoch in range(nums_epoch):
_loss = 0.0
for i, (inputs, labels) in enumerate(trainloader, 0):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_loss += loss.item()
if i % 3000 == 2999: # 每3000个batch输出一次损失
print(f"[{epoch + 1}, {i + 1}] 损失: {(_loss / 3000):.3f}")
_loss = 0.0
print("训练完成。")
输出:
开始训练,共训练 2 轮。
[1, 3000] 损失: 2.138
[1, 6000] 损失: 1.740
[1, 9000] 损失: 1.582
[1, 12000] 损失: 1.501
[2, 3000] 损失: 1.428
[2, 6000] 损失: 1.381
[2, 9000] 损失: 1.333
[2, 12000] 损失: 1.301
训练完成。
def imshow(img):
img = img / 2 + 0.5 # 反标准化
npimg = img.numpy() # 转为NumPy格式
plt.imshow(np.transpose(npimg, (1, 2, 0))) # 将图像维度调整为 (height, width, channels)
plt.show()
dataiter = iter(testloader)
images, labels = next(dataiter) # 使用next()获取数据
print("获取一个batch的测试图像和标签。图像形状:", images.shape)
imshow(torchvision.utils.make_grid(images)) # 显示图像
print('图像真实分类: ', ' '.join([f'{classes[labels[j]]:5s}' for j in range(4)]))
outputs = net(images.to(device))
_, predicted = torch.max(outputs, 1)
print('图像预测分类: ', ' '.join([f'{classes[predicted[j]]:5s}' for j in range(4)]))
输出:
获取一个batch的测试图像和标签。图像形状: torch.Size([4, 3, 32, 32])
图像真实分类: cat dog cat bird
图像预测分类: dog dog dog dog
correct, total = 0, 0
with torch.no_grad():
for images, labels in testloader:
images, labels = images.to(device), labels.to(device)
outputs = net(images)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (labels == predicted).sum().item()
accuracy = 100 * correct / total
print(f"测试集准确率: {accuracy:.2f}%")
使用 torch.no_grad() 禁止计算梯度,提高推理时的效率
本次分享就结束了