python社会网络度与聚类系数的计算(network+原理法)

  • Post author:
  • Post category:python


一、借助包完成网络度与聚类系数的计算与可视化

python为我们提供了networkx包,可以帮助进行网络关键指标的实现。networkx是Python的一个包,用于构建和操作复杂的图结构,提供分析图的算法。本实验重点讲解在networkx包基础上与实验1不同之处。这个不难,直接上代码。

# -*- coding: utf-8 -*-
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 生成网络
def readnetwork(network_filename):
    network = open(network_filename)  # 读取txt文件
    network = network.readlines()  # 将txt文件写入列表
    G = nx.Graph()  # 创建网络图
    # 处理边
    for edge in network:
        edge = edge.strip('\n').split('\t')
        G.add_edge(edge[0], edge[1])
    nx.draw(G,node_size = 10,with_labels=True) # 可视化
    plt.show()
    print("网络的全部节点依次为:", G.nodes())
    print("网络的全部边依次为:", G.edges())
    print("网络边的数量:", G.number_of_edges())
    return G
# 根据一个列表求每个值的个数及所占比例,绘制折线图
def percent(list, x_label, y_label, title):
    list.sort(reverse=False)
    list_new = []
    sum = 0
    # 处理成(列表元素,出现次数)格式,计算出出现次数sum
    for i in list:
        if (i, list.count(i)) in list_new:
            continue
        else:
            list_new.append((i, list.count(i)))
            sum = sum + list.count(i)
    print("列表元素及对应出现次数为", list_new)
    print("列表元素出现次数的总和为", sum)
    element_list = []
    element_percent = []
    for i in list_new:
        element_list.append(i[0])
        element_percent.append(i[1] / sum)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.plot(element_list, element_percent, linewidth=1)
    plt.title(title)
    plt.show()
    print('-------------------------------------------------')

print('本程序运行结果为借助networkx包完成的节点度和节点聚类系数的计算')

# 读取网络
G = readnetwork('us 500.txt')
# 计算网络的节点度数
degree = nx.degree(G)
print("网络的节点度数", degree)
print('排序后为',sorted(degree,reverse=False))
print('-------------------------------------------------')
# 计算节点度数出现次数及分布概率
degree_num = []
for degree_edge in degree:
    degree_num.append(degree_edge[1])
percent(degree_num, '度数', '度数概率', '度分布概率')
# 计算网络节点聚类系数
cluster = nx.clustering(G)
print("网络节点聚类系数为:", cluster)
print('-------------------------------------------------')
# 计算节点聚类系数分布概率
clustering_coefficient = list(cluster.values())
percent(clustering_coefficient, '聚类系数', '聚类系数概率', '聚类系数分布概率')
# 计算网络聚类系数均值
print("网络聚类系数的均值为", np.mean(list(cluster.values())))

二、借助原理法完成网络度和聚类系数的计算与可视化

import matplotlib.pyplot as plt
import numpy as np
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
def percent(list, x_label, y_label, title):
    list.sort(reverse=False)
    list_new = []
    sum = 0
    # 处理成(列表元素,出现次数)格式,计算出出现次数sum
    for i in list:
        if (i, list.count(i)) in list_new:
            continue
        else:
            list_new.append((i, list.count(i)))
            sum = sum + list.count(i)
    print("列表元素及对应出现次数为", list_new)
    print("列表元素出现次数的总和为", sum)
    element_list = []
    element_percent = []
    for i in list_new:
        element_list.append(i[0])
        element_percent.append(i[1] / sum)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.plot(element_list, element_percent, linewidth=1)
    plt.title(title)
    plt.show()
    print('-------------------------------------------------')
print('本程序运行结果为按照原理完成的节点度和节点聚类系数的计算')
network = open('us 500.txt')  # 读取txt文件
network = network.readlines()  # 将txt文件写入列表
edge_left_list = []
edge_right_list = []
edge_list = []
# 获取网络的点、边
for edge in network:
    # 去掉字符串的换行符,以空格作为字符串分割依据,得到起始终止点
    edge = edge.strip('\n').split('\t')
    edge_left_list.append(edge[0])
    edge_right_list.append(edge[1])
    edge_list.append(edge)
# 去掉重复的点
node_list = list(set(edge_right_list + edge_left_list))

degree = []
for i in node_list:
    k = 0
    for j in edge_list:
        if j[0] == i or j[1] == i:
            k = k + 1
    degree.append((i, k))
degree = sorted(list(set(degree)), reverse=False)
print('经过排序的节点度数为', degree)

# 计算节点度数出现次数及分布概率
degree_num = []
for degree_edge in degree:
    degree_num.append(degree_edge[1])
percent(degree_num, '度数', '度数概率', '度分布概率')
# 计算聚类系数
cluster_dict = {}
for node in node_list:
    node_neighbour_list = []
    # 获得所有邻居点
    for edge in edge_list:
        if edge[0] == node:
            node_neighbour_list.append(edge[1])
        if edge[1] == node:
            node_neighbour_list.append(edge[0])
    n = len(node_neighbour_list)
    sum_edge = (n * (n - 1)) / 2
    real_edge = 0
    for edge in edge_list:
        if edge[0] in node_neighbour_list and edge[1] in node_neighbour_list:
            real_edge = real_edge + 1
    try:
        cluster_dict.update({node: real_edge / sum_edge})
    except:
        cluster_dict.update({node: 0})
print('节点的聚类系数依次为', cluster_dict)
# 计算节点聚类系数分布概率
clustering_coefficient = list(cluster_dict.values())
percent(clustering_coefficient, '聚类系数', '聚类系数概率', '聚类系数分布概率')
# 计算网络聚类系数均值
print("网络聚类系数的均值为", np.mean(list(cluster_dict.values())))



版权声明:本文为weixin_46874423原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。