智能评卷系统的第三种实现方式具体实现

  • Post author:
  • Post category:其他


本文通过具体实现智能评卷系统的第三种实现方式,来展示网络的搭建和训练过程。

首先,构建一个完整的网络,它不仅包含了前面已有的网络编码部分,同时进行了网络的后半部分的解码输出。

参考到编码的向量其句向量维度的大小为768,因此在最后的MLP全连接网络中,采用了多层的网络,逐步进行降维,最后输出结果。

具体代码如下:

import torch
import torch.nn as nn
import torch.nn.functional as F
from utils import get_max_hidden

class NetBiLSTM(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, layer_num, out_dim):
        super(NetBiLSTM, self).__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.layer_num = layer_num
        self.out_dim = out_dim

        self.BiLSTMx = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim,
                              num_layers=2, bidirectional=True,
                              batch_first=True )
        self.BiLSTMax = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim,
                               num_layers=2, bidirectional=True,
                               batch_first=True)

        self.h0, self.c0 = (torch.randn(2*layer_num, 20, hidden_dim), torch.randn(2*layer_num, 20, hidden_dim) )
        self.h1, self.c1 = (torch.randn(2*layer_num, 20, hidden_dim), torch.randn(2*layer_num, 20, hidden_dim) )

        self.LinearSta = nn.Linear(in_features=4 * hidden_dim, out_features=hidden_dim)
        self.LinearMid = nn.Linear(hidden_dim, 300)
        self.LinearEnd = nn.Linear(300, out_dim)

        self.Dropout = nn.Dropout(p=0.3)

    def forward(self, input_ax, input_x):
        out_bilstmx, (self.h0, self.c0) = self.BiLSTMx(input_x, (self.h0, self.c0))
        out_bilstmax, (self.h1, self.c1) = self.BiLSTMax(input_ax, (self.h1, self.c1))

        in_linearstax = get_max_hidden(out_bilstmx)
        in_linearstaax = get_max_hidden(out_bilstmax)

        in_v = torch.cat((in_linearstax, in_linearstaax), dim=1)
        # print(in_v.size())

        out_linearsta = self.LinearSta(in_v)
        in_linearmid = self.Dropout(F.relu(out_linearsta))

        out_linearmid = self.LinearMid(in_linearmid)
        in_linearend = self.Dropout(F.relu(out_linearmid))

        out = self.LinearEnd(in_linearend)
        return out
        

在输入BiLSTM网络之前,由于每一个考生答案的句子长短不一,因此需要使用pad填充操作,将句子填充到最大的长度。

def get_padded_vector(vec_list_x, max_x_len=8):
    embed_feature = 768
    vec_x_padded = []
    for vec in vec_list_x:
        data = [vec_i for vec_i in vec]
        if len(data) >= max_x_len:
            data = data[:max_x_len]
        else:
            for i in range(max_x_len - len(data)):
                data = data + [[0 for t in range(embed_feature)]]
        vec_x_padded.append(data)
    return vec_x_padded

在进行输入网络前,首先要进行batch的划分,因此进行分片操作。

def get_padded_x(max_x_len=11):
    embed_feature = 768
    vec_list_x = load_data_to_vec_list('dataset/vec-train-5000.csv')
    vec_x_padded = []
    for vec in vec_list_x:
        data = [vec_i for vec_i in vec]
        if len(data) >= max_x_len:
            data = data[:max_x_len]
        else:
            for i in range(max_x_len - len(data)):
                data = data + [[0 for t in range(embed_feature)]]
        vec_x_padded.append(data)
    return vec_x_padded

最后进行全局的训练:

from saveVec import load_data_to_vec_list_together, load_data
from utils import reshape_into_slices, get_padded_vector, label_to_1D
import torch
import torch.nn as nn
from BM import NetBiLSTM
from HenYouShuoFa import ShuoFaObj

def train1(epochi):
    vec, lable = load_data_to_vec_list_together('vec1000_4000.csv')

    sf = ShuoFaObj()
    vec_ax = []
    first = []
    for i in vec:
        vec_ax.append(sf.get_ax(i))

    print(torch.tensor(vec_ax).size())

    vec_x_pad = get_padded_vector(vec)
    print(torch.tensor(vec_x_pad).size())

    label_id = []
    for li in lable:
        if li == 0:label_id.append(0)
        elif li == 1.5:label_id.append(1)
        else:label_id.append(2)

    vec_ax_t = torch.tensor(vec_ax).view(150, -1, 5, 768)
    print(vec_ax_t.size())
    vec_x_pad_t = torch.tensor(vec_x_pad).view(150, -1, 8, 768)
    print(vec_x_pad_t.size())
    label_id_t = torch.tensor(label_id, dtype=torch.long).view(150, -1)
    print(label_id_t.size())

    Model = NetBiLSTM(768, 1000, 2, 3)

    # Model.load_state_dict(torch.load('params_batch5.pth'))
    Model.load_state_dict(torch.load('params_batch20_ex6000.pth'))
    Model.train()
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(Model.parameters(), lr=0.001)
    # optimizer = torch.optim.SGD(Model.parameters(), lr=0.001, momentum=0.9)


    for epoch in range(1):
        for i in range(150):
            optimizer.zero_grad()
            pred = Model.forward(vec_ax_t[i], vec_x_pad_t[i])
            loss_i = loss(pred, label_id_t[i])
            loss_i.backward()
            optimizer.step()

            if i % 10 == 0:
                print('epoch ' + str(epochi) + ', loss is ' + str(loss_i.item()))
                torch.save(Model.state_dict(), 'params_batch20_ex6000.pth', _use_new_zipfile_serialization=True)

def train2(epochi):
    vec, lable = load_data_to_vec_list_together('vec10000_13000.csv')

    sf = ShuoFaObj()
    vec_ax = []
    first = []
    for i in vec:
        vec_ax.append(sf.get_ax(i))

    print(torch.tensor(vec_ax).size())

    vec_x_pad = get_padded_vector(vec)
    print(torch.tensor(vec_x_pad).size())

    label_id = []
    for li in lable:
        if li == 0:label_id.append(0)
        elif li == 1.5:label_id.append(1)
        else:label_id.append(2)

    vec_ax_t = torch.tensor(vec_ax).view(150, -1, 5, 768)
    print(vec_ax_t.size())
    vec_x_pad_t = torch.tensor(vec_x_pad).view(150, -1, 8, 768)
    print(vec_x_pad_t.size())
    label_id_t = torch.tensor(label_id, dtype=torch.long).view(150, -1)
    print(label_id_t.size())

    Model = NetBiLSTM(768, 1000, 2, 3)

    # Model.load_state_dict(torch.load('params_batch5.pth'))
    Model.load_state_dict(torch.load('params_batch20_ex6000.pth'))
    Model.train()
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(Model.parameters(), lr=0.001)
    # optimizer = torch.optim.SGD(Model.parameters(), lr=0.001, momentum=0.9)


    for epoch in range(1):
        for i in range(150):
            optimizer.zero_grad()
            pred = Model.forward(vec_ax_t[i], vec_x_pad_t[i])
            loss_i = loss(pred, label_id_t[i])
            loss_i.backward()
            optimizer.step()

            if i % 10 == 0:
                print('epoch ' + str(epochi) + ', loss is ' + str(loss_i.item()))
                torch.save(Model.state_dict(), 'params_batch20_ex6000.pth', _use_new_zipfile_serialization=True)

for i in range(10):
    train1(i)
    train2(i)
    

至此,三个实现方法全部完成,下面的文章将进行结果的分析和在编程中使用到的技巧介绍。



版权声明:本文为hshshsjjs原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。