notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
%matplotlib inline



In [2]:

    
df = DataFrame({'category': ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'],
               'data': np.random.randn(8),
               'weights': np.random.rand(8)})
df



In [3]:

    
grouped = df.groupby('category')
get_wavg = lambda g: np.average(g['data'], weights=g['weights'])
grouped.apply(get_wavg)









    Out[3]:





category
a   -1.340881
b   -0.392401
dtype: float64



In [4]:

    
np.average?



In [7]:

    
close_px = pd.read_csv('ch09/stock_px.csv', parse_dates=True, index_col=0)
close_px[-4:]



In [8]:

    
rets = close_px.pct_change().dropna()
spx_corr = lambda x: x.corrwith(x['SPX'])
by_year = rets.groupby(lambda x: x.year)
by_year.apply(spx_corr)



In [13]:

    
# 苹果和微软的年度相关系数
by_year.apply(lambda g: g['AAPL'].corr(g['MSFT']))









    Out[13]:





2003    0.480868
2004    0.259024
2005    0.300093
2006    0.161735
2007    0.417738
2008    0.611901
2009    0.432738
2010    0.571946
2011    0.581987
dtype: float64



In [12]:

    
DataFrame.corrwith?



In [17]:

    
import statsmodels.api as sm
def regress(data, yvar, xvars):
    Y = data[yvar]
    X = data[xvars]
    X['intercept'] = 1.
    # 最小二乘法（Ordinary Least Squares, OLS）
    result = sm.OLS(Y, X).fit()
    return result.params



In [18]:

    
by_year.apply(regress, 'AAPL', ['SPX'])



In [22]:

    
regress(rets.loc[:, :], 'AAPL', ['SPX'])









    



d:\Anaconda2\lib\site-packages\ipykernel\__main__.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy






    Out[22]:





SPX          1.025987
intercept    0.001896
dtype: float64



In [23]:

    
sm.OLS?



In [25]:

    
Y = [1,2,3,4,5,6,7,8]
X = [2,3,4,5,6,7,8,9]
X = sm.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()
results.params









    Out[25]:





array([-1.,  1.])



In [ ]:

	category	data	weights
0	a	1.016068	0.273315
1	a	-1.611363	0.969514
2	a	-2.169703	0.301117
3	a	-1.501790	0.822700
4	b	-1.184955	0.779356
5	b	-1.291867	0.312252
6	b	0.913494	0.686205
7	b	-0.385359	0.344992

	AAPL	MSFT	XOM	SPX
2011-10-11	400.29	27.00	76.27	1195.54
2011-10-12	402.19	26.96	77.16	1207.25
2011-10-13	408.43	27.18	76.37	1203.66
2011-10-14	422.00	27.27	78.11	1224.58

	AAPL	MSFT	XOM	SPX
2003	0.541124	0.745174	0.661265	1.0
2004	0.374283	0.588531	0.557742	1.0
2005	0.467540	0.562374	0.631010	1.0
2006	0.428267	0.406126	0.518514	1.0
2007	0.508118	0.658770	0.786264	1.0
2008	0.681434	0.804626	0.828303	1.0
2009	0.707103	0.654902	0.797921	1.0
2010	0.710105	0.730118	0.839057	1.0
2011	0.691931	0.800996	0.859975	1.0

	SPX	intercept
2003	1.195406	0.000710
2004	1.363463	0.004201
2005	1.766415	0.003246
2006	1.645496	0.000080
2007	1.198761	0.003438
2008	0.968016	-0.001110
2009	0.879103	0.002954
2010	1.052608	0.001261
2011	0.806605	0.001514