基于PyTorch搭建CNN实现视频动作分类任务2022年10月6日
项目介绍 简单实训项目:datafountain.cn
里面有具体的项目说明,并且可以下载数据集。
项目路径
文章图片
- datasets是在官网下载的数据包,
文章图片
继承dataset,适配pytorch的dataloader
import torch.nn as nn
from PIL import Image
import os
from torch.utils.data import DataLoader, sampler, Dataset# 继承Module类,使用pytorch中设计好的Dataloader作为我们的数据的加载器。
#
# 该加载器能够根据设定在每一次请求时自动加载一批训练数据,
#
# 能够自主实现多线程加载,能够在快速加载的同时尽可能的节省内存开销。# 而Dataloader类所加载的数据必须是pytorch中定义好的Dataset类,
#
# 所以我们的第一步,就是将我们的数据封装成一个Dataset类。class BasketballDataset(Dataset):
def __init__(self, root_dir, labels=[], transform=None):
super(BasketballDataset, self).__init__()
self.root_dir = root_dir
self.labels = labels
self.transform = transform
self.length = len(os.listdir(self.root_dir))def __len__(self):
return self.length * 3# 因为此数据集一个视频有三张图片def __getitem__(self, idx):
folder = idx // 3 + 1# 得到文件夹名非0部分
imidex = idx % 3 + 1# 得到每个文件夹下图片的编号1,2,3
# 文件夹,命名的文件夹名为5为数字,如00001,所以要进行格式转变
folder = format(folder, '05d')
imgname = str(imidex) + '.jpg'
img_path = os.path.join(self.root_dir, folder, imgname)
image = Image.open(img_path)Label = 0
# test集没有标签
if len(self.labels) != 0:
Label = self.labels[idx // 3][0] - 1
if self.transform:
image = self.transform(image)
if len(self.labels) != 0:
sample = {'image': image, 'img_path': img_path, "Label": Label}
else:
sample = {'image': image, 'img_path': img_path}
return sample
pre.py
【python|基于PyTorch搭建CNN实现视频动作分类任务】? 数据读取
from torch.utils.data import DataLoader
import torchvision.transforms as T
import scipy.iofrom basketball.basketballDataset import BasketballDatasetdef get_dataloader(name):# 保存的label的标签
label_mat = scipy.io.loadmat('../datasets/q3_2_data.mat')
label_train = label_mat['trLb']
print('train_len:', len(label_train))
label_val = label_mat['valLb']
print('val_len:', len(label_val))if name == 'train':
dataset_train = BasketballDataset(root_dir='../datasets/trainClips',
labels=label_train,
transform=T.ToTensor())dataloader_train = DataLoader(dataset_train,
batch_size=32,
shuffle=True,
num_workers=4)
return dataloader_trainif name == 'val':
dataset_val = BasketballDataset(root_dir='../datasets/valClips',
labels=label_val,
transform=T.ToTensor())dataloader_val = DataLoader(dataset_val,
batch_size=32,
shuffle=True,
num_workers=4)
return dataloader_val
if name == 'test':
dataset_test = BasketballDataset(root_dir='../datasets/testClips',
transform=T.ToTensor())dataloader_test = DataLoader(dataset_test,
batch_size=32,
shuffle=True,
num_workers=4)
return dataloader_testif __name__ == '__main__':# 保存的label的标签
label_mat = scipy.io.loadmat('../datasets/q3_2_data.mat')
label_train = label_mat['trLb']
print('train_len:', len(label_train))
label_val = label_mat['valLb']
print('val_len:', len(label_val))# Dataloader类所加载的数据必须是pytorch中定义好的Dataset类,
# 所以我们的第一步,就是将我们的数据封装成一个Dataset类。
dataset = BasketballDataset(root_dir='../datasets/trainClips',
labels=label_train,
transform=T.ToTensor())# Dataloader
dataloader = DataLoader(dataset,
batch_size=4,
shuffle=True,
num_workers=4)for i in range(3):
sample = dataset[i]
print(sample['image'].shape)
print(sample['Label'])
print(sample['img_path'])for i, sample in enumerate(dataloader):
print(i, sample['image'].shape, sample['img_path'], sample['Label'])
if i > 5:
break
model.py
? 继承nn.Module,编写网络
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader, sampler, Dataset
import torchvision.datasets as dset
import torchvision.transforms as T
import timeit
from PIL import Image
import os
import numpy as np
import scipy.io
import torchvision.models.inception as inceptionclass Cnn(nn.Module):
def __init__(self, channel=3):
super(Cnn, self).__init__()
self.sequential = nn.Sequential(
nn.Conv2d(channel, 8, kernel_size=7, stride=1),
nn.BatchNorm2d(8),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, stride=2),
nn.Conv2d(8, 16, 7, 1),
nn.BatchNorm2d(16),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, stride=2),
nn.Flatten(),
nn.ReLU(inplace=True),
nn.Linear(16 * 11 * 11, 10)
)def forward(self, x):
return self.sequential(x)if __name__ == '__main__':
model = Cnn()
model = model
x = torch.randn(32, 3, 64, 64)
x_var = Variable(x)# 需要将其封装为Variable类型。
outputs = model(x_var)
print(np.array(outputs.size()))# 检查模型输出。
np.array_equal(np.array(outputs.size()), np.array([32, 10]))
train.py
? 模型训练
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variablefrom basketball.pre import get_dataloader
from basketball.model import Cnndtype = torch.FloatTensor# 这是pytorch所支持的cpu数据类型中的浮点数类型。print_every = 100# 这个参数用于控制loss的打印频率,因为我们需要在训练过程中不断的对loss进行检测。model = Cnn()
loss_fun = nn.CrossEntropyLoss()
opt_fun = optim.RMSprop(model.parameters(), lr=0.0001)def train(epoch):
dataloader_train = get_dataloader('train')
dataloader_val = get_dataloader('val')
model.train()
for i in range(epoch):
for idx, sample in enumerate(dataloader_train):
img = Variable(sample['image'])
labels = Variable(sample['Label'].long())output = model(img)
loss = loss_fun(output, labels)opt_fun.zero_grad()
loss.backward()
opt_fun.step()if idx % 100 == 0:
model.eval()
acc_count = 0.0
all_count = 0
with torch.no_grad():
for idx, sample in enumerate(dataloader_val):
img = Variable(sample['image'])
labels = Variable(sample['Label'].long())
output = model(img)_, pre = torch.max(output, dim=1)
acc_count += (pre == labels).sum().item()
all_count += labels.size()[0]print('第{}轮,第{}次,loss为: {},验证集准确率为:{}'.format(i + 1, idx + 1, loss.item(), acc_count / all_count))if __name__ == '__main__':
train(1)
torch.save(model.state_dict(), '../model/basketball.pth')
predict.py
? 模型预测test数据集,并保存到result.csv文件中
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader, sampler, Dataset
import torchvision.datasets as dset
import torchvision.transforms as T
import timeit
from PIL import Image
import os
import numpy as np
import scipy.io
import torchvision.models.inception as inceptionfrom basketball.model import Cnn
from basketball.pre import get_dataloaderdef predict():
dataloader_test = get_dataloader('test')
model = Cnn()
model.load_state_dict(torch.load('../model/basketball.pth'))write_csv = open('../result.csv', 'w')
count = 0
write_csv.write('Id' + ',' + 'Class' + '\n')model.eval()
for t, sample, in enumerate(dataloader_test):
img = Variable(sample['image'])
output = model(img)
_, pre = torch.max(output, dim=1)
for i in range(len(pre)):
write_csv.write(str(count) + ',' + str(pre[i].item()) + '\n')
count += 1write_csv.close()
return countif __name__ == '__main__':
count = predict()
print(count)
预测结果
文章图片
推荐阅读
- python|stn在mnist上的实现
- B站|B站马士兵python入门基础版详细笔记(5)
- flask|阿里云宝塔部署python-flask项目
- python|python爬虫实战演示
- python|day04 循环练习题
- Debug生涯|seaborn urllib.error.URLError: < urlopen error [WinError 10054] 远程主机强迫关闭了一个现有的连接。>
- python|使用python-Django创建Web站点
- 分类|基于vgg16的猫狗识别(二分类)
- 数学建模的知识|数学建模 层次分析法 python计算权重