准备数据应该是一件比较麻烦的过程,所以一般都去找那种公开的数据集。在网上找到的可以用于猫狗分类的数据集有 Kaggle 的 “Dogs vs. Cats”数据集,还有牛津大学提供的 Oxford-IIIT Pet 数据集,包含猫和狗的图片,都是非常适合做猫狗分类任务的公开数据集。
这里我就选择 Kaggle 中的 Cat VS Dog 数据集,在 Kaggle 中搜一下就搜到了
Kaggle 提供的原始数据集结构是这样的:
data/
├── Cat/
│ ├── 1.jpg
│ ├── 2.jpg
│ ├── ...
├── Dog/
│ ├── 1.jpg
│ ├── 2.jpg
│ ├── ...
我们下载后传到板子上,然后解压下载的数据集:
解压之后简单移动一下 Dog 和 Cat 目录:
但 PyTorch 的 ImageFolder
需要数据按照 train/
和 val/
分类存放,所以我们要将数据整理成如下格式:
data_split/
├── train/
│ ├── cats/
│ │ ├── 1.jpg
│ │ ├── 2.jpg
│ ├── dogs/
│ ├── 1.jpg
│ ├── 2.jpg
├── val/
│ ├── cats/
│ │ ├── 1001.jpg
│ │ ├── 1002.jpg
│ ├── dogs/
│ ├── 1001.jpg
│ ├── 1002.jpg
运行以下代码,它会自动 创建 train/
和 val/
目录,并按 80%:20% 的比例划分数据:
import os
import shutil
import random
# 设置路径
original_data_dir = "data" # 你的原始 Cat/ 和 Dog/ 目录所在路径
base_dir = "data_split" # 训练/验证集存放路径
train_dir = os.path.join(base_dir, "train")
val_dir = os.path.join(base_dir, "val")
# 创建 train 和 val 目录
for split in ["train", "val"]:
os.makedirs(os.path.join(train_dir, "cats"), exist_ok=True)
os.makedirs(os.path.join(train_dir, "dogs"), exist_ok=True)
os.makedirs(os.path.join(val_dir, "cats"), exist_ok=True)
os.makedirs(os.path.join(val_dir, "dogs"), exist_ok=True)
# 获取所有猫和狗的图片
all_cats = [f for f in os.listdir(os.path.join(original_data_dir, "Cat")) if f.endswith(".jpg")]
all_dogs = [f for f in os.listdir(os.path.join(original_data_dir, "Dog")) if f.endswith(".jpg")]
# 随机打乱数据集
random.seed(42)
random.shuffle(all_cats)
random.shuffle(all_dogs)
# 计算 80% 训练,20% 验证
train_size = int(0.8 * len(all_cats))
train_cats, val_cats = all_cats[:train_size], all_cats[train_size:]
train_dogs, val_dogs = all_dogs[:train_size], all_dogs[train_size:]
# 复制猫图片到新的目录
for fname in train_cats:
shutil.copy(os.path.join(original_data_dir, "Cat", fname), os.path.join(train_dir, "cats", fname))
for fname in val_cats:
shutil.copy(os.path.join(original_data_dir, "Cat", fname), os.path.join(val_dir, "cats", fname))
# 复制狗图片到新的目录
for fname in train_dogs:
shutil.copy(os.path.join(original_data_dir, "Dog", fname), os.path.join(train_dir, "dogs", fname))
for fname in val_dogs:
shutil.copy(os.path.join(original_data_dir, "Dog", fname), os.path.join(val_dir, "dogs", fname))
print("数据集划分完成!")
📌 运行后,你的 data_split/
目录就会按照 PyTorch ImageFolder
规范整理好。
数据划分完成后,我们就可以用 VGG16 进行训练了。
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
# 数据预处理
train_transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
val_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# 加载数据
train_dataset = datasets.ImageFolder(root="data_split/train", transform=train_transform)
val_dataset = datasets.ImageFolder(root="data_split/val", transform=val_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
按照论文以及网上的资料参考简单写了一个 VGG16 。加载了官方给的预训练权重,然后把模型移动到 GPU,否则会非常慢。(NPU 训练似乎也非常慢,训练一直卡掉/(ㄒoㄒ)/~~
出问题。
import torch
import torch.nn as nn
import torchvision.models as models
# 自定义 VGG16
class VGG16(nn.Module):
def __init__(self, num_classes=2):
super(VGG16, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 4096), nn.ReLU(), nn.Dropout(),
nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(),
nn.Linear(4096, num_classes) # 2 类(猫/狗)
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1) # Flatten
x = self.classifier(x)
return x
# 创建模型实例
model = VGG16(num_classes=2)
# 手动加载预训练权重
pretrained_vgg = models.vgg16(pretrained=True) # 加载 torchvision 预训练模型
pretrained_dict = pretrained_vgg.state_dict() # 获取预训练的权重
# 获取自己定义的模型参数
model_dict = model.state_dict()
# 只加载 features(卷积层)的参数,不加载 classifier(全连接层)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict and "classifier" not in k}
model_dict.update(pretrained_dict) # 更新自己模型的参数
model.load_state_dict(model_dict) # 加载权重
print("✅ 预训练权重加载成功!")
# 将模型移动到 GPU(如果可用)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
不过如果你非常了解 VGG16,你也可以直接使用 torchvision 中的 VGG16 模型,官方给出的模型都是调过参数的,训练效果会更好、更快。
import torchvision.models as models
import torch.nn as nn
# 加载预训练的 VGG16 模型
model = models.vgg16(pretrained=True)
# 冻结特征提取层
for param in model.features.parameters():
param.requires_grad = False
# 修改全连接层
num_features = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_features, 2) # 2 个类别(猫/狗)
# 设备设置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.classifier.parameters(), lr=0.001, momentum=0.9)
# 训练
num_epochs = 10
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for i, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if (i + 1) % 50 == 0:
print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {running_loss / 50:.4f}")
running_loss = 0.0
print("训练完成!")
训练过程中可能出现读取中断的情况,可能是文件太大等等原因,删掉图片或者直接跳过不理他就行了。
在 Cloud Studio 的免费 GPU 算力加持下,训练也是非常快的,Kaggle 的猫狗数据集半个小时就训练完了,比 CPU 训练快多了。
model.eval()
correct = 0
total = 0
with torch.no_grad():
for inputs, labels in val_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f"验证集准确率:{accuracy:.2f}%")
划分的验证集准确率高达 98.89%
保存一下模型:
# 保存整个模型
torch.save(model, "vgg16_cat_dog.pth")
print("✅ 模型已保存到 vgg16_cat_dog.pth")
展示一下前五张图预测结果,全对。
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import numpy as np
# 预处理函数(和训练时一致)
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# 读取验证集前 5 张图片
num_images = 5
fig, axes = plt.subplots(1, num_images, figsize=(15, 5))
model.eval()
with torch.no_grad():
for i in range(num_images):
image, label = val_dataset[i] # 取出图片
image_tensor = image.unsqueeze(0).to(device) # 添加 batch 维度,并移动到 GPU
output = model(image_tensor)
_, predicted = torch.max(output, 1) # 获取预测类别
# 反归一化 + 转换为 numpy 格式
image = image.permute(1, 2, 0).numpy()
image = (image * [0.229, 0.224, 0.225]) + [0.485, 0.456, 0.406] # 反归一化
image = np.clip(image, 0, 1) # 限制到 [0, 1]
# 显示图片 + 预测结果
axes[i].imshow(image)
axes[i].set_title(f"Pred: {'Dog' if predicted.item() == 1 else 'Cat'}")
axes[i].axis("off")
plt.show()
✅ 数据划分:将原始猫狗数据集拆分为 80% 训练集,20% 验证集。
✅ 数据加载:使用 torchvision.datasets.ImageFolder
读取数据,并进行标准化。
✅ VGG 迁移学习:加载 VGG16 预训练模型,冻结卷积层,仅微调全连接层。
✅ 训练过程:使用交叉熵损失 + SGD 优化器进行训练,验证集准确率可达 90%+!
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。