In [1]:
import quandl;
import pandas as pd;
import pickle;
import matplotlib.pyplot as plt;
from matplotlib import style;
style.use("ggplot");
In [2]:
api_key = open("quandlapikey.txt", "r").read();
def state_list():
fiddy_states = pd.read_html("https://simple.wikipedia.org/wiki/List_of_U.S._states");
return fiddy_states[0][0][1:];
def grap_initial_state_data_start_pct():
states = state_list();
main_df = pd.DataFrame();
for ab in states:
querry = "FMAC/HPI_" + ab;
df = quandl.get(querry, authtoken = api_key);
df.columns = [ab];
df[ab] = (df[ab] - df[ab][0]) / df[ab][0] * 100.0; # <-------
if main_df.empty:
main_df = df;
else:
main_df = main_df.join(df);
pickle_out = open("./data/fiddy_states.pickle", "wb");
pickle.dump(main_df, pickle_out);
pickle_out.close();
def HPI_Benchmark():
df = quandl.get("FMAC/HPI_USA", authtoken = api_key);
df.columns = ["US"];
df["US"] = (df["US"] - df["US"][0]) / df["US"][0] * 100.0; # <-------
return df;
In [3]:
#grap_initial_state_data_start_pct();
HPI_data = pd.read_pickle("./data/fiddy_states.pickle");
HPI_data.plot();
plt.legend().remove();
plt.show();
In [21]:
TX_year = HPI_data["TX"].resample("A").mean(); #why is it so ugly!!!
print(TX_year.head());
#http://pandas.pydata.org/pandas-docs/stable/timeseries.html#timeseries-offset-aliases
In [28]:
fig = plt.figure();
ax1 = plt.subplot2grid((1, 1), (0, 0));
HPI_data["TX"].plot(ax = ax1, label = "Monthly Texas HPI");
TX_year.plot(ax = ax1, color = "k", label = "Yearly Texas HPI");
plt.legend(loc = 4);
plt.show();
In [34]:
TX_year_ohlc = HPI_data["TX"].resample("A").ohlc();
print(TX_year_ohlc.head());
fig = plt.figure();
ax1 = plt.subplot2grid((1, 1), (0, 0));
HPI_data["TX"].plot(ax = ax1, label = "Monthly Texas HPI");
TX_year_ohlc.plot(ax = ax1);
plt.legend(loc = 2);
plt.show();
In [ ]: