python情感分析包_情感分析snownlp包部分核心代码理解

snownlps是用Python写的个中文情感分析的包，自带了中文正负情感的训练集，主要是评论的语料库。使用的是朴素贝叶斯原理来训练和预测数据。主要看了一下这个包的几个主要的核心代码，看的过程作了一些注释，记录一下免得以后再忘了。

1. sentiment文件夹下的__init__.py，主要是集成了前面写的几个模块的功能，进行打包。

1 #-*- coding: utf-8 -*-

2 from __future__ importunicode_literals3

4 importos5 importcodecs6

7 from .. importnormal8 from .. importseg9 from ..classification.bayes importBayes10

11 data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),12 ‘sentiment.marshal’)13

15 classSentiment(object):16

17 def __init__(self):#实例化Bayes()类作为属性，下面的很多方法都是调用的Bayes()的方法完成的

18 self.classifier =Bayes()19

20 def save(self, fname, iszip=True):#保存最终的模型

21 self.classifier.save(fname, iszip)22

23 def load(self, fname=data_path, iszip=True):24 self.classifier.load(fname, iszip)#加载贝叶斯模型

26 #分词以及去停用词的操作

27 defhandle(self, doc):28 words = seg.seg(doc)#分词

29 words = normal.filter_stop(words)#去停用词

30 return words#返回分词后的结果，是一个list列表

32 deftrain(self, neg_docs, pos_docs):33 data =[]34 for sent in neg_docs:#读入负样本

35 data.append([self.handle(sent), ‘neg’])36 #所以可以看出进入bayes（）的训练的数据data格式是[[[第一行分词],类别],

37 #[[第二行分词], 类别]，

38 #[[第n行分词],类别]

39 #]

40 for sent in pos_docs: #读入正样本

41 data.append([self.handle(sent), ‘pos’])4