前言
纯粹想学习一下torch的数据集类,可能后面会用到吧。
简单介绍
我们在训练过程中除了写模型、训练等程序外,还会用到数据加载,而官方就几个数据加载的类,不足以满足个人的需求
我想要将个人的npy文件数据集加载进来,还有每一个npy文件对应了一个target,于是便想动手写个Dataset
参考pytorch文档可以看到,所有其他数据集都会进行经子类化,且所有的子类都应该override
__len__
和
__getitem__
-
__len__
提供了数据集的大小 -
__getitem__
支持整数索引,范围从0到len(self)
其实这里我在马代码过程中感觉到有点不对劲,如果
__len__
提供的是真的个数(比如存在6个,而真是就是6个,而不是6-1个),那么
__getitem__
的0到len(self)(因为从0开始的话,最后一个索引应该是5,而不是6,即len(self))就会超出索引,所以在我的代码里面为了不出现错误,所幸将数据集进行了掐头去尾
Show you the Code
class OFDataset(Dataset):
def __init__(self, path_dataset, seq):
self.path_OF = r'{}flow_data/'.format(path_dataset)
self.path_pose = r'{}gt_poses_frames/'.format(path_dataset)
self.seq = seq
self.get_len_of_each_seq()
pass
def __getitem__(self, item):
'''支持整数索引,范围从0到len(self)'''
path_index = self.decode(item)
sample, target = self.loader(path_index)
return sample, target
def __len__(self):
'''提供了数据集的大小'''
return self.index_range[-1] - 1
def get_len_of_each_seq(self):
'''获取每一个seq的数据长度'''
self.len_seq = [] # [4540, 1100, ...]
for i in self.seq:
pose_name = r'{}{}.txt'.format(self.path_pose, i)
with open(pose_name, 'r') as f:
len_i = len(f.readlines())
self.len_seq.append(len_i)
self.index_range = [0] # [0, 4540, 5640, ...]
for j in range(len(self.len_seq)):
if j == 0:
max_range_j = self.index_range[0] + self.len_seq[j]
else:
max_range_j = self.index_range[j] + self.len_seq[j]
self.index_range.append(max_range_j)
def decode(self, item):
'''对item进行解码,并获取相应的path和index'''
path_index = {}
for i in range(len(self.index_range)):
if item >= self.index_range[i] and item < self.index_range[i+1]:
path_index['path'] = self.seq[i]
path_index['index'] = item - self.index_range[i]
return path_index
else:
pass
def loader(self, path_index):
'''通过索引返回tansor'''
sample_path = r'{}{}/{}-{}.npy'.format(
self.path_OF,
path_index['path'],
path_index['index'],
path_index['index']+1
)
sample = np.load(sample_path)
sample = torch.from_numpy(sample)
target_path = r'{}{}.txt'.format(
self.path_pose,
path_index['path']
)
with open(target_path, 'r') as f:
readline = f.readlines()[path_index['index']]
readline = readline.split(',')
readline = list(map(float, readline))
readline = np.array(readline)
target = readline[:6]
target = torch.from_numpy(target)
return sample, target
其实整个代码中,我们只需要在意的就是下面三个函数:
-
__init__(self, path_dataset, seq)
初始化函数,我这里传入的是数据集的地址和需要读取的序列 -
__getitem__(self, item)
这个函数是最重要的之一,还有一个是下面那个,这两个是必须要要有的,而且必须返回相应的数据,像这个函数就需要返回的是sample和target(样本和标签,有些子类标签不是必须的),然回的数据必须是tensor类型的数据,这样子才能直接被输入网络里面进行训练 -
__len__(self)
这个说的很清楚,就是所有数据的总量,是一个int类型的
我的代码里还有其他的函数,其实都是为上面的两个重要的函数作准备的,里面有注释,我也就不介绍了。
路径哦
npy数据保存在flow_data文件夹下的00到10文件夹里
而gt_poses_frames文件夹里面的txt文件就是对应每一个[00,01,02,…,10]文件夹里面的npy数据的标签,以行为单位
然后00.txt文件夹第一行就对应data_flow/00/文件夹下第一个npy数据,如图
整份代码
'''
@Author: Astrophil (luo19902567292@163.com)
@Date: 2022-03-18
@LastEditTime: 2022-03-23
@LastEditors: Astrophil
@Description:
'''
import os
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
import torch.optim as optim
import torchvision
from torchvision import transforms, models, datasets
import imageio
import time
import warnings
import random
import sys
import copy
import json
from PIL import Image
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
class OFDataset(Dataset):
def __init__(self, path_dataset, seq):
self.path_OF = r'{}flow_data/'.format(path_dataset)
self.path_pose = r'{}gt_poses_frames/'.format(path_dataset)
self.seq = seq
self.get_len_of_each_seq()
pass
def __getitem__(self, item):
'''支持整数索引,范围从0到len(self)'''
path_index = self.decode(item)
sample, target = self.loader(path_index)
return sample, target
def __len__(self):
'''提供了数据集的大小'''
return self.index_range[-1] - 1
def get_len_of_each_seq(self):
'''获取每一个seq的数据长度'''
self.len_seq = [] # [4540, 1100, ...]
for i in self.seq:
pose_name = r'{}{}.txt'.format(self.path_pose, i)
with open(pose_name, 'r') as f:
len_i = len(f.readlines())
self.len_seq.append(len_i)
self.index_range = [0] # [0, 4540, 5640, ...]
for j in range(len(self.len_seq)):
if j == 0:
max_range_j = self.index_range[0] + self.len_seq[j]
else:
max_range_j = self.index_range[j] + self.len_seq[j]
self.index_range.append(max_range_j)
def decode(self, item):
'''对item进行解码,并获取相应的path和index'''
path_index = {}
for i in range(len(self.index_range)):
if item >= self.index_range[i] and item < self.index_range[i+1]:
path_index['path'] = self.seq[i]
path_index['index'] = item - self.index_range[i]
return path_index
else:
pass
def loader(self, path_index):
'''通过索引返回tansor'''
sample_path = r'{}{}/{}-{}.npy'.format(
self.path_OF,
path_index['path'],
path_index['index'],
path_index['index']+1
)
sample = np.load(sample_path)
sample = torch.from_numpy(sample)
target_path = r'{}{}.txt'.format(
self.path_pose,
path_index['path']
)
with open(target_path, 'r') as f:
readline = f.readlines()[path_index['index']]
readline = readline.split(',')
readline = list(map(float, readline))
readline = np.array(readline)
target = readline[:6]
target = torch.from_numpy(target)
return sample, target
# 一般卷积层,relu层,池化层可以写成一个套餐
# 注意卷积最后结果还是一个特征图,需要把图转换成向量才能做分类或者回归任务
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential( # 输入大小(2, 192, 640)
nn.Conv2d(
in_channels=2, # of
out_channels=16, # 要得到多少个特征图
kernel_size=5, # 卷积核大小
stride=1, # 步长
padding=2,# 如果希望卷积后大小跟原来一样,需要配置padding=(kernal_size-1)/2 if stride=1
),
nn.ReLU(), # relu
nn.MaxPool2d(kernel_size=2),# 进行池化操作(2x2区域),输出结果为(16, 81, 320)
)
self.conv2 = nn.Sequential( # [16, 96, 320]
nn.Conv2d(
in_channels=16,
out_channels=32,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),# [32, 48, 160]
)
self.conv3 = nn.Sequential( # [32, 48, 160]
nn.Conv2d(
in_channels=32,
out_channels=64,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),# [64, 24, 80]
)
self.fc1 = nn.Linear(in_features=64*24*80, out_features=1024)
self.fc2 = nn.Linear(in_features=1024, out_features=6)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
#x = x.view(x.size(0), -1) # flatten操作,结果为:(batch_size, 31*7*7)
x = x.view(-1, 64*24*80)
fc1_out = F.relu(self.fc1(x))
fc2_out = F.relu(self.fc2(fc1_out))
return fc2_out
'''dataset'''
path_dataset = 'dataset/kitti_odom/'
seq_train = ['00', '01', '03', '10']
seq_test = ['02', '04']
dataset_train = OFDataset(path_dataset, seq_train)
data_loader_train = DataLoader(dataset_train, batch_size=8)
dataset_test = OFDataset(path_dataset, seq_test)
data_loader_test = DataLoader(dataset_test, batch_size=8)
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
print('CUDA is not available. Training on CPU...')
else:
print('CUDA is available! Training on GPU...')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
'''准确率作为评估标准'''
def accuracy(predictions, labels):
pred = torch.max(predictions.data, 1)[1]
rights = pred.eq(labels.data.view_as(pred)).sum()
return rights, len(labels)
'''训练网络模型'''
# 实例化
net = CNN()
net = net.to(device)
# 损失函数
# criterion = nn.CrossEntropyLoss()
criterion = nn.MSELoss()
# 优化器
optimizer = optim.Adam(net.parameters(), lr=0.001)
num_epochs = 50
for epoch in range(num_epochs):
loss_list = []
print('begin {}th batch'.format(epoch))
for batch_idx, (data, target) in enumerate(data_loader_train):
data = data.to(device)
target = target.to(device)
net.train()
output = net(data)
output = output.to(torch.float32)
target = target.to(torch.float32)
loss = criterion(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch_idx % 20 == 0:
print(epoch, batch_idx)
print(loss)
loss_list.append(loss.data)
print('loss: {}'.format(loss_list[-1]))
里面的CNN类代码是我随便乱写的,只是为了测试以下Dataset类是否能够进行加载
里面的损失很奇怪,我估计是因为label中的6个回归真值没有进行标准化
备注
个人纯深度学习小白,可能存在不对的地方,还望大佬们指正
哦哦,可能后续会上传到git吧哈哈
参考