目录
2.textrank源码中UndirectWeightedGraph类方法分解解析
(2)添加边的函数def addEdge(self, start, end, weight)
(3)def rank(self)函数(个人觉得在这个无向有权图类中最重要的一部分)
3.textrank源码中TextRank(KeywordExtractor)类的代码分片解释
(2) def pairfilter(self, wp)函数
1.textrank源码解析
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
import sys
from operator import itemgetter
from collections import defaultdict
import jieba.posseg
from .tfidf import KeywordExtractor
from .._compat import *
class UndirectWeightedGraph:
d = 0.85
def __init__(self):
self.graph = defaultdict(list)#这是进行分词后的一个词典
def addEdge(self, start, end, weight):
# use a tuple (start, end, weight) instead of a Edge object
self.graph[start].append((start, end, weight))
self.graph[end].append((end, start, weight))
def rank(self):
ws = defaultdict(float)#权值list表
outSum = defaultdict(float)
# 初始化各个结点的权值
# 统计各个结点的出度的次数之和
wsdef = 1.0 / (len(self.graph) or 1.0)
for n, out in self.graph.items():
ws[n] = wsdef
outSum[n] = sum((e[2] for e in out), 0.0)#e[2]是什么?
# this line for build stable iteration
sorted_keys = sorted(self.graph.keys())
# 遍历若干次
for x in xrange(10): # 10 iters
#遍历各个节点
for n in sorted
版权声明:本文为weixin_42168614原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。