在服务器安装了Anaconda4,Anaconda附带了一大批常用数据科学包,方便Python进行数据分析。
#!/usr/bin/python
# -*- coding: UTF-8
'''
DBSCAN算法测试程序
@author: 程序兔
@date: 2018-05-08
'''
from math import *
from sklearn.cluster import DBSCAN
def get_distance(array_1, array_2):
lon_a = array_1[0]
lat_a = array_1[1]
lon_b = array_2[0]
lat_b = array_2[1]
radlat1 = radians(lat_a)
radlat2 = radians(lat_b)
a = radlat1 - radlat2
b = radians(lon_a) - radians(lon_b)
s = 2 * asin(sqrt(pow(sin(a/2),2) + cos(radlat1) * cos(radlat2)*pow(sin(b/2),2)))
earth_radius = 6378137
s = s * earth_radius
return s
def create_coordinate_list():
result = []
# 第1个聚类簇,3个点在500米范围内
point_11 = [116.501146, 39.9915]
point_12 = [116.501452,39.991002]
point_13 = [116.501685,39.990491]
# 第2个聚类簇,2个点在500米范围内
point_21 = [116.528509,39.995549]
point_22 = [116.530808,39.99389]
# 噪音点
point_31 = [116.419131,40.024949]
# 第3个聚类簇,2个点在500米范围内
point_41 = [116.51913,39.972384]
point_42 = [116.520747,39.972025]
# 噪音点
point_51 = [116.53075,39.883273]
result.append(point_11)
result.append(point_12)
result.append(point_21)
result.append(point_22)
result.append(point_31)
result.append(point_41)
result.append(point_42)
# 此处加入第1个聚类簇的第3个点
result.append(point_13)
result.append(point_51)
return result
def main():
# 模拟坐标数据
coordinate_list = create_coordinate_list()
# DBSCAN聚类
dbscan = DBSCAN(eps=500, min_samples=2, metric=get_distance).fit(coordinate_list)
# 输出[ 0 0 1 1 -1 2 2 0 -1]
print(dbscan.labels_)
if __name__ == '__main__':
main()
dbscan.labels_数组就是聚类簇索引列表,噪音点为-1,其他代表各个簇的索引,顺序依次对应坐标点列表。
版权声明:本文为wu_boy原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。