In [1]:
import pandas as pd
from sklearn.decomposition import PCA
import numpy as np
In [2]:
close_prices = pd.read_csv('resources/close_prices.csv')
In [3]:
close_prices.head(10)
Out[3]:
In [4]:
pca = PCA(n_components=10)
pca.fit(close_prices.loc[:, 'AXP':].values)
pca.explained_variance_ratio_
Out[4]:
In [5]:
-np.sort(-pca.explained_variance_ratio_)
Out[5]:
In [6]:
sum1 = 0.0
count = 1
for i in -np.sort(-pca.explained_variance_ratio_):
sum1 += i
if sum1 >= 0.9:
break
count += 1
print ("Количество признаков: " + str(count))
In [7]:
comp1 = pd.DataFrame(pca.transform(close_prices.loc[:, 'AXP':]))[0]
In [8]:
djia = pd.read_csv('resources/djia_index.csv')
In [9]:
djia.head(10)
Out[9]:
In [13]:
dji = djia['^DJI'];
np.corrcoef(comp1, dji)
Out[13]:
In [23]:
comp0_w = pd.Series(pca.components_[0])
comp0_w_top = comp0_w.sort_values(ascending=False).head(1).index[0]
company = close_prices.columns[comp0_w_top + 1]
print(company)