import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn.cross_validation import cross_val_score
from sklearn.metrics import classification_report
input_file = 'building_event_binary.txt'
#读取数据
X = []
count = 0
with open(input_file, 'r') as f:
for line in f.readlines():
data = line[:-1].split(',')
X.append([data[0]] + data[2:]) #data[1]没有用
X = np.array(X)
#将字符串转换为数值
label_encoder = []
X_encoded = np.empty(X.shape)
for i, item in enumerate(X[0]): #处理一行数据,
if item.isdigit(): #如果这个数据是数值型的那么这一列就是数值型的
X_encoded[:,i] = X[:,i]
else:
label_encoder.append(preprocessing.LabelEncoder())#插入
X_encoded[:,i] = label_encoder[-1].fit_transform(X[:,i]) #用最新的训练器去转换字符串
X = X_encoded[:,:-1].astype(int)
y = X_encoded[:,-1].astype(int)
#建立SVM模型
params = {'kernel':'rbf', 'probability':True, 'class_weight':'balanced'}
classifier = SVC(**params)
classifier.fit(X, y)
#交叉验证
accuracy = cross_val_score(classifier, X, y, scoring='accuracy', cv = 3)
print(round(100*accuracy.mean(),2))
#性能评估
y_predict = classifier.predict(X)
print(classifier.score(X, y_predict))
print(classification_report(y, y_predict))
#对单一数据示例进行编码测试
input_data = ['Tuesday', '12:30:00', '21', '23']
input_data_encoded = [-1]*len(input_data) #[-1,-1,-1,-1]
count = 0
for i, item in enumerate(input_data):
if item.isdigit():
input_data_encoded[i] = int(input_data[i])
else:
input_data_encoded[i] = int(label_encoder[count].transform(input_data[i]))
count = count + 1
input_data_encoded = np.array(input_data_encoded)
#输出结果
output_class = classifier.predict(input_data_encoded)
print('Output class', label_encoder[-1].inverse_transform(output_class))[0]
版权声明:本文为u012967763原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。