In [91]:
# Load the libraries
import numpy as np
import pandas as pd
from scipy import stats
from sklearn import linear_model
In [30]:
# Load the data again!
df = pd.read_csv("data/Weed_Price.csv", parse_dates=[-1])
df.sort(columns=['State','date'], inplace=True)
df1 = df[df.State=="California"].copy()
df1.set_index("date", inplace=True)
print df1.shape
idx = pd.date_range(df1.index.min(), df1.index.max())
df1 = df1.reindex(idx)
df1.fillna(method = "ffill", inplace=True)
print df1.shape
In [10]:
df1.head()
Out[10]:
In [12]:
#Reading demographics data
demographics = pd.DataFrame.from_csv("data/Demographics_State.csv",header=0,index_col=False,sep=',')
In [17]:
demographics.rename(columns={'region':'State'}, inplace=True)
demographics.head()
Out[17]:
In [18]:
df['State'] = df['State'].str.lower()
df.head()
Out[18]:
In [20]:
df_demo = pd.merge(df, demographics, how="inner", on="State")
df_demo.head()
Out[20]:
In [25]:
corr_bw_percapita_highq = stats.pearsonr(df_demo.per_capita_income, df_demo.HighQ)[0]
print corr_bw_percapita_highq
Exercise Find correlation between percent_white and highQ
In [ ]:
Impact of de-regulation
In [27]:
state_location = pd.read_csv("data/State_Location.csv")
state_location.head()
Out[27]:
In [28]:
pd.unique(state_location.status)
Out[28]:
Exercise Find mean prices of HighQ weed for states that are legal and for states that are illegal
In [ ]:
Finding good time of the week to buy weed in California
In [45]:
df['year'] = pd.DatetimeIndex(df['date']).year
df['month'] = pd.DatetimeIndex(df['date']).month
df['week'] = pd.DatetimeIndex(df['date']).week
df['weekday'] = pd.DatetimeIndex(df['date']).weekday
In [42]:
df_demo_ca = df_demo[df_demo.State=="california"].copy()
df_demo_ca['year'] = pd.DatetimeIndex(df_demo_ca['date']).year
df_demo_ca['month'] = pd.DatetimeIndex(df_demo_ca['date']).month
df_demo_ca['week'] = pd.DatetimeIndex(df_demo_ca['date']).week
df_demo_ca['weekday'] = pd.DatetimeIndex(df_demo_ca['date']).weekday
df_demo_ca.head()
Out[42]:
In [43]:
df_demo_ca.groupby("weekday").HighQ.mean()
Out[43]:
Exercise If I need to buy weed on a wednesday, which state should I be in?
In [46]:
df.groupby(["State", "weekday"]).HighQ.mean()
Out[46]:
In [47]:
df_st_wk = df.groupby(["State", "weekday"]).HighQ.mean()
In [53]:
df_st_wk.reset_index()
Out[53]:
In [54]:
#Answer:
Predicting price of HighQ weed in CA
In [127]:
model_data = df1.loc[:,['HighQ']].copy()
idx = pd.date_range(model_data.index.min(), model_data.index.max()+ 30)
model_data.reset_index(inplace=True)
model_data.set_index("index", inplace=True)
model_data = model_data.reindex(idx)
model_data.tail(35)
Out[127]:
In [128]:
model_data['IND'] = np.arange(model_data.shape[0])
model_data.tail(35)
Out[128]:
In [129]:
model_data['IND_SQ'] = model_data['IND']**2
x = model_data.ix[0:532, ["IND","IND_SQ"]]
y = model_data.ix[0:532, "HighQ"]
In [132]:
x_test = model_data.ix[532:, ["IND","IND_SQ"]]
In [130]:
print x.shape, y.shape
In [131]:
ols = linear_model.LinearRegression(fit_intercept=True)
ols.fit(x, y)
Out[131]:
In [133]:
ols_predict = ols.predict(x_test)
In [136]:
ols_predict
Out[136]:
In [143]:
ols.coef_
Out[143]:
Exercise Predict prices for MedQ for CA
In [ ]: