本文通过具体实现智能评卷系统的第三种实现方式,来展示网络的搭建和训练过程。
首先,构建一个完整的网络,它不仅包含了前面已有的网络编码部分,同时进行了网络的后半部分的解码输出。
参考到编码的向量其句向量维度的大小为768,因此在最后的MLP全连接网络中,采用了多层的网络,逐步进行降维,最后输出结果。
具体代码如下:
import torch
import torch.nn as nn
import torch.nn.functional as F
from utils import get_max_hidden
class NetBiLSTM(nn.Module):
def __init__(self, embedding_dim, hidden_dim, layer_num, out_dim):
super(NetBiLSTM, self).__init__()
self.embedding_dim = embedding_dim
self.hidden_dim = hidden_dim
self.layer_num = layer_num
self.out_dim = out_dim
self.BiLSTMx = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim,
num_layers=2, bidirectional=True,
batch_first=True )
self.BiLSTMax = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim,
num_layers=2, bidirectional=True,
batch_first=True)
self.h0, self.c0 = (torch.randn(2*layer_num, 20, hidden_dim), torch.randn(2*layer_num, 20, hidden_dim) )
self.h1, self.c1 = (torch.randn(2*layer_num, 20, hidden_dim), torch.randn(2*layer_num, 20, hidden_dim) )
self.LinearSta = nn.Linear(in_features=4 * hidden_dim, out_features=hidden_dim)
self.LinearMid = nn.Linear(hidden_dim, 300)
self.LinearEnd = nn.Linear(300, out_dim)
self.Dropout = nn.Dropout(p=0.3)
def forward(self, input_ax, input_x):
out_bilstmx, (self.h0, self.c0) = self.BiLSTMx(input_x, (self.h0, self.c0))
out_bilstmax, (self.h1, self.c1) = self.BiLSTMax(input_ax, (self.h1, self.c1))
in_linearstax = get_max_hidden(out_bilstmx)
in_linearstaax = get_max_hidden(out_bilstmax)
in_v = torch.cat((in_linearstax, in_linearstaax), dim=1)
# print(in_v.size())
out_linearsta = self.LinearSta(in_v)
in_linearmid = self.Dropout(F.relu(out_linearsta))
out_linearmid = self.LinearMid(in_linearmid)
in_linearend = self.Dropout(F.relu(out_linearmid))
out = self.LinearEnd(in_linearend)
return out
在输入BiLSTM网络之前,由于每一个考生答案的句子长短不一,因此需要使用pad填充操作,将句子填充到最大的长度。
def get_padded_vector(vec_list_x, max_x_len=8):
embed_feature = 768
vec_x_padded = []
for vec in vec_list_x:
data = [vec_i for vec_i in vec]
if len(data) >= max_x_len:
data = data[:max_x_len]
else:
for i in range(max_x_len - len(data)):
data = data + [[0 for t in range(embed_feature)]]
vec_x_padded.append(data)
return vec_x_padded
在进行输入网络前,首先要进行batch的划分,因此进行分片操作。
def get_padded_x(max_x_len=11):
embed_feature = 768
vec_list_x = load_data_to_vec_list('dataset/vec-train-5000.csv')
vec_x_padded = []
for vec in vec_list_x:
data = [vec_i for vec_i in vec]
if len(data) >= max_x_len:
data = data[:max_x_len]
else:
for i in range(max_x_len - len(data)):
data = data + [[0 for t in range(embed_feature)]]
vec_x_padded.append(data)
return vec_x_padded
最后进行全局的训练:
from saveVec import load_data_to_vec_list_together, load_data
from utils import reshape_into_slices, get_padded_vector, label_to_1D
import torch
import torch.nn as nn
from BM import NetBiLSTM
from HenYouShuoFa import ShuoFaObj
def train1(epochi):
vec, lable = load_data_to_vec_list_together('vec1000_4000.csv')
sf = ShuoFaObj()
vec_ax = []
first = []
for i in vec:
vec_ax.append(sf.get_ax(i))
print(torch.tensor(vec_ax).size())
vec_x_pad = get_padded_vector(vec)
print(torch.tensor(vec_x_pad).size())
label_id = []
for li in lable:
if li == 0:label_id.append(0)
elif li == 1.5:label_id.append(1)
else:label_id.append(2)
vec_ax_t = torch.tensor(vec_ax).view(150, -1, 5, 768)
print(vec_ax_t.size())
vec_x_pad_t = torch.tensor(vec_x_pad).view(150, -1, 8, 768)
print(vec_x_pad_t.size())
label_id_t = torch.tensor(label_id, dtype=torch.long).view(150, -1)
print(label_id_t.size())
Model = NetBiLSTM(768, 1000, 2, 3)
# Model.load_state_dict(torch.load('params_batch5.pth'))
Model.load_state_dict(torch.load('params_batch20_ex6000.pth'))
Model.train()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(Model.parameters(), lr=0.001)
# optimizer = torch.optim.SGD(Model.parameters(), lr=0.001, momentum=0.9)
for epoch in range(1):
for i in range(150):
optimizer.zero_grad()
pred = Model.forward(vec_ax_t[i], vec_x_pad_t[i])
loss_i = loss(pred, label_id_t[i])
loss_i.backward()
optimizer.step()
if i % 10 == 0:
print('epoch ' + str(epochi) + ', loss is ' + str(loss_i.item()))
torch.save(Model.state_dict(), 'params_batch20_ex6000.pth', _use_new_zipfile_serialization=True)
def train2(epochi):
vec, lable = load_data_to_vec_list_together('vec10000_13000.csv')
sf = ShuoFaObj()
vec_ax = []
first = []
for i in vec:
vec_ax.append(sf.get_ax(i))
print(torch.tensor(vec_ax).size())
vec_x_pad = get_padded_vector(vec)
print(torch.tensor(vec_x_pad).size())
label_id = []
for li in lable:
if li == 0:label_id.append(0)
elif li == 1.5:label_id.append(1)
else:label_id.append(2)
vec_ax_t = torch.tensor(vec_ax).view(150, -1, 5, 768)
print(vec_ax_t.size())
vec_x_pad_t = torch.tensor(vec_x_pad).view(150, -1, 8, 768)
print(vec_x_pad_t.size())
label_id_t = torch.tensor(label_id, dtype=torch.long).view(150, -1)
print(label_id_t.size())
Model = NetBiLSTM(768, 1000, 2, 3)
# Model.load_state_dict(torch.load('params_batch5.pth'))
Model.load_state_dict(torch.load('params_batch20_ex6000.pth'))
Model.train()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(Model.parameters(), lr=0.001)
# optimizer = torch.optim.SGD(Model.parameters(), lr=0.001, momentum=0.9)
for epoch in range(1):
for i in range(150):
optimizer.zero_grad()
pred = Model.forward(vec_ax_t[i], vec_x_pad_t[i])
loss_i = loss(pred, label_id_t[i])
loss_i.backward()
optimizer.step()
if i % 10 == 0:
print('epoch ' + str(epochi) + ', loss is ' + str(loss_i.item()))
torch.save(Model.state_dict(), 'params_batch20_ex6000.pth', _use_new_zipfile_serialization=True)
for i in range(10):
train1(i)
train2(i)
至此,三个实现方法全部完成,下面的文章将进行结果的分析和在编程中使用到的技巧介绍。
版权声明:本文为hshshsjjs原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。