In [1]:
import pandas as pd
import numpy as np
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
df = DataFrame({'category': ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'],
'data': np.random.randn(8),
'weights': np.random.rand(8)})
df
Out[2]:
In [3]:
grouped = df.groupby('category')
get_wavg = lambda g: np.average(g['data'], weights=g['weights'])
grouped.apply(get_wavg)
Out[3]:
In [4]:
np.average?
In [7]:
close_px = pd.read_csv('ch09/stock_px.csv', parse_dates=True, index_col=0)
close_px[-4:]
Out[7]:
In [8]:
rets = close_px.pct_change().dropna()
spx_corr = lambda x: x.corrwith(x['SPX'])
by_year = rets.groupby(lambda x: x.year)
by_year.apply(spx_corr)
Out[8]:
In [13]:
# 苹果和微软的年度相关系数
by_year.apply(lambda g: g['AAPL'].corr(g['MSFT']))
Out[13]:
In [12]:
DataFrame.corrwith?
In [17]:
import statsmodels.api as sm
def regress(data, yvar, xvars):
Y = data[yvar]
X = data[xvars]
X['intercept'] = 1.
# 最小二乘法(Ordinary Least Squares, OLS)
result = sm.OLS(Y, X).fit()
return result.params
In [18]:
by_year.apply(regress, 'AAPL', ['SPX'])
Out[18]:
In [22]:
regress(rets.loc[:, :], 'AAPL', ['SPX'])
Out[22]:
In [23]:
sm.OLS?
In [25]:
Y = [1,2,3,4,5,6,7,8]
X = [2,3,4,5,6,7,8,9]
X = sm.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()
results.params
Out[25]:
In [ ]: