FOS(Fisher optimal segmentation)是一种有序聚类方法
import numpy as np
import jieba
def normalizeRows(xx):#
'''
将矩阵正规化
x:ndarr
return:ndarr
'''
norm = []
for i in range(xx.shape[0]):
mean = np.mean(xx[i])
std = np.std(xx[i])
norm.append(1.0*(xx[i]-mean)/std)
return (np.array(norm))
# ================================
# return (1/(1+np.exp(-x)))
def sampleD(x):
'''
计算离差平方和来表示直径
x:ndarr
return:number
'''
meanVector = np.mean(x,axis=0)
D = 0
for i in x:
D+=(meanVector-i)@(meanVector-i).T
return D
def loss(x,interval):
'''
x:the normalized matrix,ndarr
interval:all the classes interval,2 d list
return:the loss,number
'''
loss = 0
for button,top in interval:
loss+=sampleD(x[button:top])
# print(button,top,sampleD(x[button:top]))
return loss
class FOP():
'''
fisher最优分割'''
def __init__(self,x,n_classes=4):
self.LP = np.zeros((x.shape[0],n_classes+1)) #可以提前 把我已经训练好的数据放到这
self.PATH = np.zeros((x.shape[0],n_classes+1)).tolist()#可以提前 把我已经训练好的数据放到这,在../data中
def FOP_2(self,xx,n_num,n_classes=2,intv=True):
'''
find the 2 classes Fisher optimal partition
xx:the normalized matrix ndarr
n_num:the length of data
n_classes:
intv:decide return the interval or not
return:the best one classes interval list
the min loss,number
'''
n_num = n_num-1
if self.LP[n_num,n_classes] != 0:#def a zeros Lp matrix
return self.PATH[n_num][n_classes],self.LP[n_num,n_classes]
# x=normalizeRows(x)
x = xx.copy()
x = x[0:n_num+1]
length = x.shape[0]
minLoss = np.inf
bestInterval = []
for i in (range(1,length)):
interval = [[0,i],[i,length]]
tempLoss = loss(x,interval)
if tempLoss < minLoss:
# print('find a better one')
minLoss = tempLoss
bestInterval = interval
# print('best',minLoss)
self.LP[n_num,n_classes] = minLoss
self.PATH[n_num][n_classes] = bestInterval#将路径和loss存储
if intv:
return bestInterval,minLoss
else:
return minLoss
def FOP_3(self,xx,n_num,n_classes=3,intv = True):
'''
find the 3 classes Fisher optimal partition
x:the normalized matrix ndarr
intv:return interval or not
return:the best one classes interval list
the min loss,number
'''
n_num = n_num-1
if self.LP[n_num,n_classes] != 0:#def a zeros Lp matrix
return self.PATH[n_num][n_classes],self.LP[n_num,n_classes]
x = xx.copy()
x = x[0:n_num+1]
length = x.shape[0]
minLoss = np.inf
bestInterval = []
for i in (range(3-1,length)):
# print('dangla')
# print('I',end='')
interval = [[0,i],[i,length]]
# print(interval)
_,minLoss_old = self.FOP_2(x,i)
tempLoss = loss(x,[interval[1]])+minLoss_old
# print(loss(x,[interval[1]]))
# print(minLoss_old)
if tempLoss < minLoss:
# print('find a better one')
minLoss = tempLoss
bestInterval = interval
# print('best',minLoss)
bestInterval[0] = _
self.LP[n_num,n_classes] = minLoss
self.PATH[n_num][n_classes] = bestInterval
# if n_num > 400 or n_num %10 == 0:
# print(n_num,':',bestInterval,end=':')
if intv:
return bestInterval,minLoss
else:
return minLoss
def FOP_N(self,xx,n_num,n_classes,intv = True):
'''
find the n classes Fisher optimal partition,the n > 2
x:the normalized matrix ndarr
intv:return interval or not
return:the best one classes interval list
the min loss,number
'''
n_num = n_num-1
if self.LP[n_num,n_classes] != 0:#def a zeros Lp matrix
return self.PATH[n_num][n_classes],self.LP[n_num,n_classes]
x = xx.copy()
x = x[0:n_num+1]
length = x.shape[0]
minLoss = np.inf
bestInterval = []
for i in (range(n_classes-1,length)):
# print('dangla')
print('I',end='')
interval = [[0,i],[i,length]]
# print(interval)
if n_classes >= 5:
_,minLoss_old = eval('self.FOP_N(x,i,{})'.format(n_classes-1))
else:
_,minLoss_old = eval('self.FOP_{}(x,i)'.format(n_classes-1))
tempLoss = loss(x,[interval[1]])+minLoss_old
# print(loss(x,[interval[1]]))
# print(minLoss_old)
if tempLoss < minLoss:
# print('find a better one')
minLoss = tempLoss
bestInterval = interval
# print('best',minLoss)
bestInterval[0] = _
self.LP[n_num,n_classes] = minLoss
self.PATH[n_num][n_classes] = bestInterval
if n_num > 400 or n_num %10 == 0:
print(n_num,':',bestInterval,end=':')
if intv:
return bestInterval,minLoss
else:
return minLoss
# def FOP_4(self,xx,n_num,n_classes=3,intv = True):#4分割应该还没写好!!!!!!!!!!!!!!!!!!要稍微改改
# '''
# find the 4 classes Fisher optimal partition
# x:the normalized matrix ndarr
# return:the best one classes interval list
# the min loss,number
# '''
# n_num = n_num-1
# length = x.shape[0]
# minLoss = np.inf
# bestInterval = []
# for i in (range(3,length)):
# interval = [[0,i],[i,length]]
# minLoss_old = FOP_3(x,n_num)
# tempLoss = loss(x,[interval[1]])+minLoss_old
# # print(loss(x,[interval[1]]))
# # print(minLoss_old)
# if tempLoss < minLoss:
# # print('find a better one')
# minLoss = tempLoss
# bestInterval = interval
# # print('best',minLoss)
# # bestInterval[0] = _
# return bestInterval,minLoss
test
x=np.array([
[0.1,0.3,0.6],
[0.5,0.7,0.8],
[2,3,6],
[10,20,14],
[10,20,14],
[1000,1002,1020],
[100,200,200],
[100,200,200],
[100,200,200],
[100,200,200],
[100,200,200],
[100,200,200],
[100,200,200],
[100,200,200],
[100,200,200],
[100,200,200],
[100,200,200],
[100,200,200],
[100,200,200],
[100,200,200]
# [2,3,6,1,4,9],
# [5,7,8,5,3,7],
# [2,3,6,1,4,9],
# [5,7,8,5,3,7],
# [2,3,6,1,4,9],
# [5,7,8,5,3,7],
# [2,3,6,1,4,9],
# [5,7,8,5,3,7]
])
x
x = normalizeRows(x)
N_CLASS = 6
fop=FOP(x,n_classes=N_CLASS)
fop.FOP_N(x,x.shape[0],n_classes = N_CLASS,intv=True)
版权声明:本文为weixin_44504525原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。