分类|LeNet网络模型——CIFAR-10数据集进行分类

CIFAR-10数据集由10个类的60000个32*32彩色图像组成,每个类由6000个图像。其中由50000个训练图像和10000个测试图像组成。
数据集分为五个训练批次和一个测试批次,下面采用卷积神经网络对数据集进行分类。
model.py

import torch.nn as nn import torch.nn.functional as F""" pytorch Tensor的通道排序:[batch,channel,height,width] 经过卷积后的尺寸大小计算公式: N=(W-F+2P)/S + 1 (1)图片大小:w*w; (2)卷积核大小:F*F;(3)步长:s;(4)padding """ class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Conv2d(3,16,5) self.pool1 = nn.MaxPool2d(2,2) self.conv2 = nn.Conv2d(16,32,5) self.pool2 = nn.MaxPool2d(2,2) self.fc1 = nn.Linear(32*5*5,120) self.fc2 = nn.Linear(120,84) self.fc3 = nn.Linear(84,10) # 这次使用的训练集是一个只有十个分类的 分类任务所以这次就是10def forward(self,x): x = F.relu(self.conv1(x)) # input(3,32,32) output(16,28,28) x = self.pool1(x)# output(16,14,14)经过最大池化高度和宽度减为原来一半 x = F.relu(self.conv2(x)) # output(32,10,10) x = self.pool2(x)# output(32,5,5) x = x.view(-1,32*5*5)# 进行展平操作 output(32*5*5) x = F.relu(self.fc1(x))#output(120) x = F.relu(self.fc2(x))#output(84) x = self.fc3(x)#output(10) return x

train.py
import torch import torchvision import torch.nn as nn from model import LeNet import torch.optim as optim import torchvision.transforms as transformsdef main(): # transform() 对图像进行预处理的函数 transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])# 50000张训练图片 # 第一次使用时要将download设置为True才会自动去下载数据集 train_set = torchvision.datasets.CIFAR10(root='./cifar10-data', train=True, download=False, transform=transform) train_loader = torch.utils.data.DataLoader(train_set, batch_size=36, shuffle=False, num_workers=0)# 10000张验证图片 # 第一次使用时要将download设置为True才会自动去下载数据集 val_set = torchvision.datasets.CIFAR10(root='./cifar10-data', train=False, download=False, transform=transform) val_loader = torch.utils.data.DataLoader(val_set, batch_size=5000, shuffle=False, num_workers=0)val_data_iter = iter(val_loader)# iter 是转化为一个可以迭代的迭代器 val_image, val_label = val_data_iter.next()# classes = ('plane', 'car', 'bird', 'cat', #'deer', 'dog', 'frog', 'horse', 'ship', 'truck') device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net = LeNet() net = net.to(device) loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.001)# 使用Adam优化器""" 标准化:output = (input-0.5)/0.5 反标准化:input = output*0.5+0.5=output/2+0.5 """for epoch in range(5):# loop over the dataset multiple times;训练5轮running_loss = 0.0# 累加训练过程中的损失 for step, data in enumerate(train_loader, start=0): # 不仅会返回data,还会返回data所对应的步数。 # get the inputs; data is a list of [inputs, labels] inputs, labels = data inputs, labels = inputs.to(device), labels.to(device)# zero the parameter gradients optimizer.zero_grad() """ 为什么每次计算一个batch,就需要调用一次optimizer.zero_grad()? 如果不清除历史梯度,就会对计算的历史梯度进行累加, """ # forward + backward + optimize outputs = net(inputs) loss = loss_function(outputs, labels) loss.backward() optimizer.step()# print statistics running_loss += loss.item() if step % 500 == 499:# print every 500 mini-batches # with是一个上下文管理器,with torch.no_grad(): 接下来的计算中不要计算每个节点的误差损失梯度。 with torch.no_grad(): val_image, val_label = val_image.to(device), val_label.to(device) outputs = net(val_image)# [batch, 10] predict_y = torch.max(outputs, dim=1)[1] # predict_y, val_label).sum() 是一个rensor数据 .item() 获得这个数值 accuracy = torch.eq(predict_y, val_label).sum().item() / val_label.size(0)print('[%d, %5d] train_loss: %.3ftest_accuracy: %.3f' % (epoch + 1, step + 1, running_loss / 500, accuracy)) running_loss = 0.0print('Finished Training')save_path = './Lenet.pth' torch.save(net.state_dict(), save_path)if __name__ == '__main__': main()

分类训练效果显示:
[1,500] train_loss: 1.770test_accuracy: 0.448 [1,1000] train_loss: 1.452test_accuracy: 0.515 [2,500] train_loss: 1.268test_accuracy: 0.564 [2,1000] train_loss: 1.172test_accuracy: 0.597 [3,500] train_loss: 1.063test_accuracy: 0.622 [3,1000] train_loss: 1.008test_accuracy: 0.635 [4,500] train_loss: 0.952test_accuracy: 0.645 [4,1000] train_loss: 0.910test_accuracy: 0.649 [5,500] train_loss: 0.871test_accuracy: 0.655 [5,1000] train_loss: 0.839test_accuracy: 0.668

【分类|LeNet网络模型——CIFAR-10数据集进行分类】最好的效果为:66.8%

    推荐阅读