notebook.community

Edit and run



In [1]:

    
from pandas_datareader import data, wb



In [2]:

    
tickers_data = {}



In [3]:

    
from tqdm import tqdm

tickers = ['APBR', 'PAMP', 'YPFD', 'GGAL', 'ERAR', 'CRES', 'COME', 'ALUA', 'FRAN', 'MIRG',
'BMA', 'TRAN', 'TS', 'JMIN', 'EDN', 'TGSU2', 'SAMI', 'AGRO', 'TECO2', 'PESA',
'CEPU', 'CTIO', 'CECO2', 'AUSO', 'PETR', 'CELU', 'TGNO4']


for ticker in tqdm(tickers):
    if ticker in tickers_data: continue
        
    for method in 'get_data_google get_data_yahoo'.split():
        try:
            tickers_data[ticker] = {
                'source': method,
                'data': getattr(data, method)(ticker)
            }
        except Exception:
            continue









    



100%|██████████| 27/27 [01:40<00:00,  3.42s/it]



In [4]:

    
import pickle
with open('tickers_data.pkl', 'w') as f:
    pickle.dump(tickers_data, f, 2)



In [5]:

    
%matplotlib nbagg

%pylab









    



Using matplotlib backend: nbAgg
Populating the interactive namespace from numpy and matplotlib






    



/Library/Python/2.7/site-packages/IPython/core/magics/pylab.py:161: UserWarning: pylab import has clobbered these variables: ['f']
`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"



In [98]:

    
from statsmodels.tsa.filters.hp_filter import hpfilter

returns = []
for ticker, ticker_data in tickers_data.iteritems():
    close = ticker_data['data']['2017-01-01':].Close
    if len(close) == 0: continue
        
    returns.append(
        {
            'return': (close.iloc[-1] - close.iloc[0]) / close.iloc[0],
            'close': hpfilter(close, 50)[1],
            'close_orig': close,
            'ticker': ticker
        }    
    )



In [99]:

    
figure()
t = choice(returns)
title(t['ticker'])
t['close'].plot()
t['close_orig'].plot()









    














    











    Out[99]:





<matplotlib.axes._subplots.AxesSubplot at 0x1146a9f90>



In [100]:

    
def corr(s1, s2, start):
    s1 = s1[start:]
    s2 = s2[start:]
    
#     return (s1 / s1.iloc[0]).corr(s2 / s2.iloc[0])
    return (s1 / s1.iloc[0]).diff().corr((s2 / s2.iloc[0]).diff())

corrs = []
for t1 in returns:
    for t2 in returns:
        corrs.append(
            {
                't1': t1['ticker'],
                't2': t2['ticker'],
                'corr': corr(t1['close'], t2['close'], '2016-01-01')
            }
        )



In [101]:

    
import pandas as pd
returns = pd.DataFrame(returns)
returns.sort_values('return', ascending=False)[['ticker', 'return']]



In [102]:

    
corrs = pd.DataFrame(corrs)
corrs = corrs.sort_values('corr', ascending=False)
corrs[corrs.t1 == 'PAMP'].merge(returns, left_on='t2', right_on='ticker')[['t2', 'corr', 'return']]



In [104]:

    
from sklearn.cluster import k_means
figure()
plot(range(2, 10), [k_means(M, i)[-1] for i in range(2, 10)], '-o')









    














    











    Out[104]:





[<matplotlib.lines.Line2D at 0x114ffbc50>]



In [105]:

    
from sklearn.cluster import k_means
clusters = dict(zip(names, k_means(M, 5)[1]))



In [112]:

    
from collections import defaultdict

names = sorted(corrs.t1.unique(), key=lambda x:corrs['corr'][corrs.t1==x].sum())
if 'avg_cluster_distance' in globals():
    avg_cluster_distance = defaultdict(list)
    for name, cl_id in clusters.iteritems():
        avg_cluster_distance[cl_id].append(corrs['corr'][corrs.t1==name].sum())
    avg_cluster_distance = {cl_id: np.mean(v) for cl_id, v in avg_cluster_distance.iteritems()}

    names = sorted(names, key=lambda x:avg_cluster_distance[clusters[x]])
    
M = []

for name1 in names:
    row = []
    for name2 in names:
        if name1 == name2:
            row.append(0)
        else:
            c = corrs[(corrs.t1 == name1) & (corrs.t2==name2)]
            row.append(c['corr'].iloc[0])
            
        
    M.append(row)



In [111]:

    
figure()
imshow(M, interpolation='nearest', origin='lower')
xticks(range(len(names)), names, rotation=45)
yticks(range(len(names)), names, rotation=45)

colorbar()
title('Correlacion stocks del Merval')









    














    











    Out[111]:





<matplotlib.text.Text at 0x116787f50>



In [109]:

    
import networkx as nx
edges = []

for i, row in corrs.iterrows():
    if row['corr'] > 0:
        edges.append((row.t1, row.t2, round(float(row['corr']), 2)))
        
g = nx.Graph()
g.add_weighted_edges_from(edges)



In [110]:

    
nx.write_graphml(g, 'g.graphml')



In [95]:

    
!pwd









    



/Users/przivic/prog



In [114]:

    
figure()
tickers_data['FRAN']['data']['2016-01-01':].Close.plot()









    














    











    Out[114]:





<matplotlib.axes._subplots.AxesSubplot at 0x115eaff90>

	t2	corr	return
0	PAMP	1.000000	0.543103
1	PESA	0.976001	0.568966
2	YPFD	0.743478	0.366300
3	TS	0.708021	-0.137315
4	EDN	0.487973	0.163116
5	PETR	0.441737	-0.745136
6	ALUA	0.440038	0.082524
7	GGAL	0.349228	0.482124
8	JMIN	0.311200	0.153846
9	APBR	0.296299	-0.110138
10	ERAR	0.258614	0.189744
11	CECO2	0.257261	0.263158
12	TGNO4	0.244543	0.786207
13	CELU	0.239365	-0.167614
14	AGRO	0.197500	0.032877
15	AUSO	0.185255	0.537234
16	TGSU2	0.180537	0.444270
17	BMA	0.074827	0.265382
18	CTIO	-0.024119	0.040816
19	TECO2	-0.131469	0.249153
20	MIRG	-0.165242	0.091796
21	FRAN	-0.197521	-0.147922
22	COME	-0.572795	0.254902
23	SAMI	-0.738557	0.014976
24	CRES	-0.840984	0.161179