import numpy as np
import torch
from torch.utils.data import Dataset,DataLoader
import torch.nn.functional as F #relu
import torch.optim as optim
import pandas as pd
#将分类转换成编号
def convert(label):
id=[]
target=['Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9']
for i in label:
id.append(target.index(i))
return id
#1.准备数据集
class Otto(Dataset):
def __init__(self,filepath):
xy=pd.read_csv(filepath)
self.len=xy.shape[0]#得到数据条数
self.x_data=torch.from_numpy(xy.iloc[:, 1:-1].values)
self.y_data=convert(xy['target'])
def __getitem__(self,index):
return self.x_data[index],self.y_data[index]
def __len__(self):
return self.len
dataset=Otto('E:/study/PyTorch/otto-group-product-classification-challenge/train.csv')
train_loader=DataLoader(dataset=dataset,batch_size=32,shuffle=True,num_workers=0)
#2.构建模型
class Model(torch.nn.Module):
def __init__(self):
super(Model,self).__init__()
self.l1=torch.nn.Linear(93,64)
self.l2=torch.nn.Linear(64,32)
self.l3=torch.nn.Linear(32,16)
self.l4=torch.nn.Linear(16,9)
def forward(self,x):
x=F.relu(self.l1(x))
x=F.relu(self.l2(x))
x=F.relu(self.l3(x))
return self.l4(x)
def test(self,x):
with torch.no_grad():
x=F.relu(self.l1(x))
x=F.relu(self.l2(x))
x=F.relu(self.l3(x))
x=F.relu(self.l4(x))
_,predicted=torch.max(x,dim=1) #找到最大值的下标
y=pd.get_dummies(predicted) #返回最大值构成的one-hot编码
return y
model=Model()
#3.损失和优化器
criterion=torch.nn.CrossEntropyLoss() #使用交叉熵损失
optimizer=optim.SGD(model.parameters(),lr=0.01,momentum=0.5)
#训练函数
def train(epoch):
running_loss=0.0
for batch_idx,data in enumerate(train_loader,0):
inputs,target=data
inputs=inputs.float()
optimizer.zero_grad()
#forward+backward+update
outputs=model(inputs)
loss=criterion(outputs,target)
loss.backward()
optimizer.step()
running_loss+=loss.item()
if batch_idx%300==299:
print('[%d,%5d] loss:%.3f'%(epoch+1,batch_idx+1,running_loss/300))
running_loss=0.0
#4.训练
if __name__=='__main__':
for epoch in range(10):
train(epoch)
#预测test中的数据
test_dataset=pd.read_csv('E:/study/PyTorch/otto-group-product-classification-challenge/test.csv')
x_test=torch.from_numpy(test_dataset.iloc[:, 1:].values)
y_test=model.test(x_test.float())
target=['Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9']
y_test.columns=target
#将两部分合成一个DataFrame
output=pd.concat([pd.DataFrame(test_dataset['id'],columns=['id']),pd.DataFrame(y_test)],axis =1)
output.to_csv('otto_predict.csv',index=False)
预测结果部分数据:
参考博客:
《PyTorch深度学习实践》-刘二大人 Otto Group Product Classification作业
都看到这里了不在评论区夸我一下嘛(●’◡’●)
版权声明:本文为weixin_60189029原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。