下面是一个基于 TextCNN 和 LSTM 的招聘简历自动筛选系统的 Python 代码示例:
“`python import numpy as np from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation, Conv1D, GRU, CuDNNGRU, CuDNNLSTM, BatchNormalization from tensorflow.keras.layers import Bidirectional, GlobalMaxPool1D, MaxPooling1D, Add, Flatten from tensorflow.keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, concatenate, SpatialDropout1D from tensorflow.keras.models import Model, load_model from tensorflow.keras import initializers, regularizers, constraints, optimizers, layers, callbacks from tensorflow.keras import backend as K from tensorflow.keras.engine import InputSpec, Layer from tensorflow.keras.optimizers import Adam
读取数据
resumes = [] labels = [] with open(“resumes.txt”, “r”) as f: for line in f: resumes.append(line.strip().split(“\t”)[0]) labels.append(int(line.strip().split(“\t”)[1]))
划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(resumes, labels, test_size=0.2, random_state=42)
数据预处理
max_features = 20000 maxlen = 100 tokenizer = Tokenizer(num_words=max_features) tokenizer.fit_on_texts(X_train) X_train = tokenizer.texts_to_sequences(X_train) X_test = tokenizer.texts_to_sequences(X_test) x_train = pad_sequences(X_train, maxlen=maxlen) x_test = pad_sequences(X_test, maxlen=maxlen)
构建模型
def build_model(): inp = Input(shape=(maxlen,)) x = Embedding(max_features, 128)(inp) x = SpatialDropout1D(0.2)(x) x = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x) y = Bidirectional(CuDNNGRU(64, return_sequences=True))(x) avg_pool1 = GlobalAveragePooling1D()(y) max_pool1 = GlobalMaxPooling1D()(y) conc = concatenate([avg_