'''
from sklearn import datasets, model_selection, svm, decomposition, pipeline, metrics
import matplotlib.pyplot as plt
lfw_people = datasets.fetch_lfw_people(min_faces_per_person=70, resize=.4)
n_images, h, w = lfw_people.images.shape
x = lfw_people.images.reshape((n_images, -1))
n_feature = x.shape[1]
print(n_feature)
target_names = lfw_people.target_names
n_class = len(target_names)
print(n_class)
y = lfw_people.target
x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=.25)
svc = svm.SVC(class_weight='balanced')
pca = decomposition.PCA(whiten=True, svd_solver='randomized')
pipe = pipeline.Pipeline([('pca', pca), ('svc', svc)])
gs = model_selection.GridSearchCV(pipe, {'pca__n_components': [8, 16, 24, 32, 48, 56, 64], 'svc__C': [1e3, 5e3, 1e4, 5e4, 1e5], 'svc__gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1]}, n_jobs=-1, cv=5, iid=False)
gs.fit(x_train, y_train)
print(gs.score(x_test, y_test))
y_pred = gs.predict(x_test)
print(metrics.classification_report(y_test, y_pred, target_names=target_names))
print(metrics.confusion_matrix(y_test, y_pred, labels=range(n_class)))
def gallery(titles, images, h, w, ncols=4, nrows=3):
plt.figure(figsize=(1.8*ncols, 2.4*nrows))
plt.subplots_adjust(hspace=.24, left=.01, right=.99, bottom=0)
for i in range(ncols*nrows):
plt.subplot(nrows, ncols, i+1)
plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
plt.xticks(())
plt.yticks(())
plt.title(titles[i])
plt.show()
def title(target_names, y_true, y_pred, i):
true_name = target_names[y_true[i]]
pred_name = target_names[y_pred[i]]
return '%s\n%s' % (true_name, pred_name)
titles = [title(target_names, y_test, y_pred, i) for i in range(len(x_test))]
gallery(titles, x_test, h, w)
eigenface = gs.best_estimator_.named_steps['pca'].components_
eigentitle = ['eigenface%i' % (i+1) for i in range(len(eigenface))]
gallery(eigentitle, eigenface, h, w)
print(gs.best_estimator_.named_steps['pca'].n_components_)
n = len(gs.best_estimator_.named_steps['pca'].explained_variance_ratio_)
print(n)
plt.figure()
plt.plot(range(1, n+1), gs.best_estimator_.named_steps['pca'].explained_variance_ratio_)
plt.show()
'''
import numpy
import pandas
from sklearn import cluster, covariance
symbol_dict = {
'TOT': 'Total',
'XOM': 'Exxon',
'CVX': 'Chevron',
'COP': 'ConocoPhillips',
'VLO': 'Valero Energy',
'MSFT': 'Microsoft',
'IBM': 'IBM',
'TWX': 'Time Warner',
'CMCSA': 'Comcast',
'CVC': 'Cablevision',
'YHOO': 'Yahoo',
'DELL': 'Dell',
'HPQ': 'HP',
'AMZN': 'Amazon',
'TM': 'Toyota',
'CAJ': 'Canon',
'SNE': 'Sony',
'F': 'Ford',
'HMC': 'Honda',
'NAV': 'Navistar',
'NOC': 'Northrop Grumman',
'BA': 'Boeing',
'KO': 'Coca Cola',
'MMM': '3M',
'MCD': 'McDonald\'s',
'PEP': 'Pepsi',
'K': 'Kellogg',
'UN': 'Unilever',
'MAR': 'Marriott',
'PG': 'Procter Gamble',
'CL': 'Colgate-Palmolive',
'GE': 'General Electrics',
'WFC': 'Wells Fargo',
'JPM': 'JPMorgan Chase',
'AIG': 'AIG',
'AXP': 'American express',
'BAC': 'Bank of America',
'GS': 'Goldman Sachs',
'AAPL': 'Apple',
'SAP': 'SAP',
'CSCO': 'Cisco',
'TXN': 'Texas Instruments',
'XRX': 'Xerox',
'WMT': 'Wal-Mart',
'HD': 'Home Depot',
'GSK': 'GlaxoSmithKline',
'PFE': 'Pfizer',
'SNY': 'Sanofi-Aventis',
'NVS': 'Novartis',
'KMB': 'Kimberly-Clark',
'R': 'Ryder',
'GD': 'General Dynamics',
'RTN': 'Raytheon',
'CVS': 'CVS',
'CAT': 'Caterpillar',
'DD': 'DuPont de Nemours'
}
symbol_list, name_list = numpy.array(sorted(symbol_dict.items())).T
quote_list = []
for symbol in symbol_list:
quote_list.append(pandas.read_csv('data/{}.csv'.format(symbol)))
close_price_list = numpy.vstack([q['close'] for q in quote_list])
open_price_list = numpy.vstack([q['open'] for q in quote_list])
variation_list = close_price_list-open_price_list
x = variation_list.T
x /= x.std(0)
edge_model = covariance.GraphLassoCV(cv=5)
edge_model.fit(x)
_, label_list = cluster.affinity_propagation(edge_model.covariance_)
for i in range(label_list.max()+1):
print('cluster%i %s' % (i+1, ' '.join(name_list[label_list == i])))
版权声明:本文为baidu_25845567原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。