公众号后台回复关键词:gpu,获取B站视频演示教程。
在国内使用邮箱注册kaggle时会遇到一个人机验证的步骤,可以通过翻墙访问外网的方式完成,但比较麻烦。
推荐使用FireFox浏览器,下载Header Editor进行解决,无需翻墙相对简单。
1,下载安装FireFox浏览器
2,添加Header Editor浏览器插件【找到FireFox浏览器的 工具 -> 扩展和主题 —> 搜索 Header Editor -> 添加到FireFox】
3,配置Header Editor插件【找到FireFox右上角Header Editor -> 导出和导入 -> 下载规则中输入如下规则url -> 点击向下箭头加载 】
规则url: https://azurezeng.github.io/static/HE-GoogleRedirect.json
4,在kaggle官网用邮箱正常注册kaggle即可。
kaggle官网:https://www.kaggle.com/
5,此后就可以在任何能联网的地方正常登录kaggle,不再需要Header Editor了
1,新建notebook. 【点击kaggle主页面左上角+, 选择notebook】
2,开启GPU开关。【点击展开notebook右上角 |< 设置,设置Accelerator为GPU 】
3,查看GPU信息。【NoteBook中使用 nvidia-smi查看】
!nvidia-smi
import torch
torch.cuda.is_available()
1,点击展开notebook右上角 |< 设置,找到 Add Data,可以从Kaggle社区发布的数据集中选择一些想要的数据集。
2,也可以选在代表上传的向上箭头,上传数据集文件作为自定义数据集。建议压缩后上传,传输效率较高。
3,此外,也可以通过把数据放在github项目中,用git clone的方式获取。
4,notebook加载进来数据集后,可以在右边数据文件位置点击获取对应路径。
公众号 算法美食屋 回复关键词:pytorch, 获取本范例所用数据集eat_pytorch_datasets百度云盘下载链接。
约14s一个Epoch
import torch
from torch import nn
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms as T
from torchvision import datasets
#======================================================================
# 一,准备数据
#======================================================================
transform_img = T.Compose(
[T.ToTensor()])
def transform_label(x):
return torch.tensor(x)
ds_train = datasets.ImageFolder("../input/eat-pytorch-datasets/eat_pytorch_datasets/cifar2/train/",
transform = transform_img,target_transform = transform_label)
ds_val = datasets.ImageFolder("../input/eat-pytorch-datasets/eat_pytorch_datasets/cifar2/test/",
transform = transform_img,target_transform = transform_label)
print(ds_train.class_to_idx)
dl_train = DataLoader(ds_train,batch_size = 50,shuffle = True, pin_memory=True, num_workers = 8)
dl_val = DataLoader(ds_val,batch_size = 50,shuffle = False, pin_memory=True, num_workers = 8)
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
#查看部分样本
from matplotlib import pyplot as plt
plt.figure(figsize=(8,8))
for i in range(9):
img,label = ds_train[i]
img = img.permute(1,2,0)
ax=plt.subplot(3,3,i+1)
ax.imshow(img.numpy())
ax.set_title("label = %d"%label.item())
ax.set_xticks([])
ax.set_yticks([])
plt.show()
# Pytorch的图片默认顺序是 Batch,Channel,Width,Height
for features,labels in dl_train:
print(features.shape,labels.shape)
break
#======================================================================
# 二,定义模型
#======================================================================
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3,out_channels=32,kernel_size = 3)
self.pool = nn.MaxPool2d(kernel_size = 2,stride = 2)
self.conv2 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5)
self.dropout = nn.Dropout2d(p = 0.1)
self.adaptive_pool = nn.AdaptiveMaxPool2d((1,1))
self.flatten = nn.Flatten()
self.linear1 = nn.Linear(64,32)
self.relu = nn.ReLU()
self.linear2 = nn.Linear(32,2)
def forward(self,x):
x = self.conv1(x)
x = self.pool(x)
x = self.conv2(x)
x = self.pool(x)
x = self.dropout(x)
x = self.adaptive_pool(x)
x = self.flatten(x)
x = self.linear1(x)
x = self.relu(x)
x = self.linear2(x)
return x
net = Net()
print(net)
#======================================================================
# 三,训练模型(CPU)
#======================================================================
import os,sys,time
import numpy as np
import pandas as pd
import datetime
from tqdm import tqdm
import torch
from torch import nn
from copy import deepcopy
from torchmetrics import Accuracy
#注:多分类使用torchmetrics中的评估指标,二分类使用torchkeras.metrics中的评估指标
def printlog(info):
nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print("\n"+"=========="*8 + "%s"%nowtime)
print(str(info)+"\n")
net = Net()
loss_fn = nn.CrossEntropyLoss()
optimizer= torch.optim.Adam(net.parameters(),lr = 0.01)
metrics_dict = {"acc":Accuracy()}
epochs = 5
ckpt_path='checkpoint.pt'
#early_stopping相关设置
monitor="val_acc"
patience=3
mode="max"
history = {}
for epoch in range(1, epochs+1):
printlog("Epoch {0} / {1}".format(epoch, epochs))
# 1,train -------------------------------------------------
net.train()
total_loss,step = 0,0
loop = tqdm(enumerate(dl_train), total =len(dl_train))
train_metrics_dict = deepcopy(metrics_dict)
for i, batch in loop:
features,labels = batch
#forward
preds = net(features)
loss = loss_fn(preds,labels)
#backward
loss.backward()
optimizer.step()
optimizer.zero_grad()
#metrics
step_metrics = {"train_"+name:metric_fn(preds, labels).item()
for name,metric_fn in train_metrics_dict.items()}
step_log = dict({"train_loss":loss.item()},**step_metrics)
total_loss += loss.item()
step+=1
if i!=len(dl_train)-1:
loop.set_postfix(**step_log)
else:
epoch_loss = total_loss/step
epoch_metrics = {"train_"+name:metric_fn.compute().item()
for name,metric_fn in train_metrics_dict.items()}
epoch_log = dict({"train_loss":epoch_loss},**epoch_metrics)
loop.set_postfix(**epoch_log)
for name,metric_fn in train_metrics_dict.items():
metric_fn.reset()
for name, metric in epoch_log.items():
history[name] = history.get(name, []) + [metric]
# 2,validate -------------------------------------------------
net.eval()
total_loss,step = 0,0
loop = tqdm(enumerate(dl_val), total =len(dl_val))
val_metrics_dict = deepcopy(metrics_dict)
with torch.no_grad():
for i, batch in loop:
features,labels = batch
#forward
preds = net(features)
loss = loss_fn(preds,labels)
#metrics
step_metrics = {"val_"+name:metric_fn(preds, labels).item()
for name,metric_fn in val_metrics_dict.items()}
step_log = dict({"val_loss":loss.item()},**step_metrics)
total_loss += loss.item()
step+=1
if i!=len(dl_val)-1:
loop.set_postfix(**step_log)
else:
epoch_loss = (total_loss/step)
epoch_metrics = {"val_"+name:metric_fn.compute().item()
for name,metric_fn in val_metrics_dict.items()}
epoch_log = dict({"val_loss":epoch_loss},**epoch_metrics)
loop.set_postfix(**epoch_log)
for name,metric_fn in val_metrics_dict.items():
metric_fn.reset()
epoch_log["epoch"] = epoch
for name, metric in epoch_log.items():
history[name] = history.get(name, []) + [metric]
# 3,early-stopping -------------------------------------------------
arr_scores = history[monitor]
best_score_idx = np.argmax(arr_scores) if mode=="max" else np.argmin(arr_scores)
if best_score_idx==len(arr_scores)-1:
torch.save(net.state_dict(),ckpt_path)
print("<<<<<< reach best {0} : {1} >>>>>>".format(monitor,
arr_scores[best_score_idx]),file=sys.stderr)
if len(arr_scores)-best_score_idx>patience:
print("<<<<<< {} without improvement in {} epoch, early stopping >>>>>>".format(
monitor,patience),file=sys.stderr)
break
net.load_state_dict(torch.load(ckpt_path))
dfhistory = pd.DataFrame(history)
约8s一个Epoch.
import torch
from torch import nn
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms as T
from torchvision import datasets
#======================================================================
# 一,准备数据
#======================================================================
transform_img = T.Compose(
[T.ToTensor()])
def transform_label(x):
return torch.tensor(x)
ds_train = datasets.ImageFolder("../input/eat-pytorch-datasets/eat_pytorch_datasets/cifar2/train/",
transform = transform_img,target_transform = transform_label)
ds_val = datasets.ImageFolder("../input/eat-pytorch-datasets/eat_pytorch_datasets/cifar2/test/",
transform = transform_img,target_transform = transform_label)
print(ds_train.class_to_idx)
dl_train = DataLoader(ds_train,batch_size = 50,shuffle = True, pin_memory=True, num_workers = 8)
dl_val = DataLoader(ds_val,batch_size = 50,shuffle = False, pin_memory=True, num_workers = 8)
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
#查看部分样本
from matplotlib import pyplot as plt
plt.figure(figsize=(8,8))
for i in range(9):
img,label = ds_train[i]
img = img.permute(1,2,0)
ax=plt.subplot(3,3,i+1)
ax.imshow(img.numpy())
ax.set_title("label = %d"%label.item())
ax.set_xticks([])
ax.set_yticks([])
plt.show()
# Pytorch的图片默认顺序是 Batch,Channel,Width,Height
for features,labels in dl_train:
print(features.shape,labels.shape)
break
#======================================================================
# 二,定义模型
#======================================================================
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3,out_channels=32,kernel_size = 3)
self.pool = nn.MaxPool2d(kernel_size = 2,stride = 2)
self.conv2 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5)
self.dropout = nn.Dropout2d(p = 0.1)
self.adaptive_pool = nn.AdaptiveMaxPool2d((1,1))
self.flatten = nn.Flatten()
self.linear1 = nn.Linear(64,32)
self.relu = nn.ReLU()
self.linear2 = nn.Linear(32,2)
def forward(self,x):
x = self.conv1(x)
x = self.pool(x)
x = self.conv2(x)
x = self.pool(x)
x = self.dropout(x)
x = self.adaptive_pool(x)
x = self.flatten(x)
x = self.linear1(x)
x = self.relu(x)
x = self.linear2(x)
return x
net = Net()
print(net)
#======================================================================
# 三,训练模型(CPU)
#======================================================================
import os,sys,time
import numpy as np
import pandas as pd
import datetime
from tqdm import tqdm
import torch
from torch import nn
from copy import deepcopy
from torchmetrics import Accuracy
#注:多分类使用torchmetrics中的评估指标,二分类使用torchkeras.metrics中的评估指标
def printlog(info):
nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print("\n"+"=========="*8 + "%s"%nowtime)
print(str(info)+"\n")
net = Net()
loss_fn = nn.CrossEntropyLoss()
optimizer= torch.optim.Adam(net.parameters(),lr = 0.01)
metrics_dict = {"acc":Accuracy()}
#------------------------------移动模型到GPU上------------------------------
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
loss_fn.to(device)
for name,fn in metrics_dict.items():
fn.to(device)
#-------------------------------------------------------------------------
epochs = 5
ckpt_path='checkpoint.pt'
#early_stopping相关设置
monitor="val_acc"
patience=3
mode="max"
history = {}
for epoch in range(1, epochs+1):
printlog("Epoch {0} / {1}".format(epoch, epochs))
# 1,train -------------------------------------------------
net.train()
total_loss,step = 0,0
loop = tqdm(enumerate(dl_train), total =len(dl_train))
train_metrics_dict = deepcopy(metrics_dict)
for i, batch in loop:
features,labels = batch
#------------------------------移动数据到GPU上------------------------------
features = features.to(device)
labels = labels.to(device)
#-------------------------------------------------------------------------
#forward
preds = net(features)
loss = loss_fn(preds,labels)
#backward
loss.backward()
optimizer.step()
optimizer.zero_grad()
#metrics
step_metrics = {"train_"+name:metric_fn(preds, labels).item()
for name,metric_fn in train_metrics_dict.items()}
step_log = dict({"train_loss":loss.item()},**step_metrics)
total_loss += loss.item()
step+=1
if i!=len(dl_train)-1:
loop.set_postfix(**step_log)
else:
epoch_loss = total_loss/step
epoch_metrics = {"train_"+name:metric_fn.compute().item()
for name,metric_fn in train_metrics_dict.items()}
epoch_log = dict({"train_loss":epoch_loss},**epoch_metrics)
loop.set_postfix(**epoch_log)
for name,metric_fn in train_metrics_dict.items():
metric_fn.reset()
for name, metric in epoch_log.items():
history[name] = history.get(name, []) + [metric]
# 2,validate -------------------------------------------------
net.eval()
total_loss,step = 0,0
loop = tqdm(enumerate(dl_val), total =len(dl_val))
val_metrics_dict = deepcopy(metrics_dict)
with torch.no_grad():
for i, batch in loop:
features,labels = batch
#------------------------------移动数据到GPU上------------------------------
features = features.to(device)
labels = labels.to(device)
#-------------------------------------------------------------------------
#forward
preds = net(features)
loss = loss_fn(preds,labels)
#metrics
step_metrics = {"val_"+name:metric_fn(preds, labels).item()
for name,metric_fn in val_metrics_dict.items()}
step_log = dict({"val_loss":loss.item()},**step_metrics)
total_loss += loss.item()
step+=1
if i!=len(dl_val)-1:
loop.set_postfix(**step_log)
else:
epoch_loss = (total_loss/step)
epoch_metrics = {"val_"+name:metric_fn.compute().item()
for name,metric_fn in val_metrics_dict.items()}
epoch_log = dict({"val_loss":epoch_loss},**epoch_metrics)
loop.set_postfix(**epoch_log)
for name,metric_fn in val_metrics_dict.items():
metric_fn.reset()
epoch_log["epoch"] = epoch
for name, metric in epoch_log.items():
history[name] = history.get(name, []) + [metric]
# 3,early-stopping -------------------------------------------------
arr_scores = history[monitor]
best_score_idx = np.argmax(arr_scores) if mode=="max" else np.argmin(arr_scores)
if best_score_idx==len(arr_scores)-1:
torch.save(net.state_dict(),ckpt_path)
print("<<<<<< reach best {0} : {1} >>>>>>".format(monitor,
arr_scores[best_score_idx]),file=sys.stderr)
if len(arr_scores)-best_score_idx>patience:
print("<<<<<< {} without improvement in {} epoch, early stopping >>>>>>".format(
monitor,patience),file=sys.stderr)
break
net.load_state_dict(torch.load(ckpt_path))
dfhistory = pd.DataFrame(history)
扫码关注腾讯云开发者
领取腾讯云代金券
Copyright © 2013 - 2025 Tencent Cloud. All Rights Reserved. 腾讯云 版权所有
深圳市腾讯计算机系统有限公司 ICP备案/许可证号:粤B2-20090059 深公网安备号 44030502008569
腾讯云计算(北京)有限责任公司 京ICP证150476号 | 京ICP备11018762号 | 京公网安备号11010802020287
Copyright © 2013 - 2025 Tencent Cloud.
All Rights Reserved. 腾讯云 版权所有