In [1]:
import quandl;
import pandas as pd;
import pickle;
import matplotlib.pyplot as plt;
from matplotlib import style;
style.use("ggplot");
In [2]:
api_key = open("quandlapikey.txt", "r").read();
def state_list():
fiddy_states = pd.read_html("https://simple.wikipedia.org/wiki/List_of_U.S._states");
return fiddy_states[0][0][1:];
def grap_initial_state_data_start_pct():
states = state_list();
main_df = pd.DataFrame();
for ab in states:
querry = "FMAC/HPI_" + ab;
df = quandl.get(querry, authtoken = api_key);
df.columns = [ab];
df[ab] = (df[ab] - df[ab][0]) / df[ab][0] * 100.0; # <-------
if main_df.empty:
main_df = df;
else:
main_df = main_df.join(df);
pickle_out = open("./data/fiddy_states.pickle", "wb");
pickle.dump(main_df, pickle_out);
pickle_out.close();
def HPI_Benchmark():
df = quandl.get("FMAC/HPI_USA", authtoken = api_key);
df.columns = ["US"];
df["US"] = (df["US"] - df["US"][0]) / df["US"][0] * 100.0; # <-------
return df;
In [3]:
#grap_initial_state_data_start_pct();
HPI_data = pd.read_pickle("./data/fiddy_states.pickle");
HPI_data["TX1yr"] = HPI_data["TX"].resample("A").mean(); #why is it so ugly!!!
print(HPI_data[["TX", "TX1yr"]]); #now we have NaNs
#http://pandas.pydata.org/pandas-docs/stable/timeseries.html#timeseries-offset-
fig = plt.figure();
ax1 = plt.subplot2grid((1, 1), (0, 0));
HPI_data[["TX", "TX1yr"]].plot(ax = ax1);
plt.legend(loc = 4);
plt.show();
In [4]:
HPI_data.dropna(inplace = True); #ha! you have thought that it is procedure!
#it erases rows which contain NaN somewhere
print(HPI_data[["TX", "TX1yr"]]); #now we have not NaNs
#http://pandas.pydata.org/pandas-docs/stable/timeseries.html#timeseries-offset-
fig = plt.figure();
ax1 = plt.subplot2grid((1, 1), (0, 0));
HPI_data[["TX", "TX1yr"]].plot(ax = ax1);
plt.legend(loc = 4);
plt.show();
In [5]:
import math;
df = pd.DataFrame({"first":[1, float("nan"), float("nan")],
"second":[4, 5, float("nan")]});
print(df);
df.dropna(inplace = True, how = "all");
print(df);
In [6]:
df = pd.DataFrame({"first":[1, float("nan"), float("nan"), 7],
"second":[4, 5, float("nan"), 8]});
print(df);
df.fillna(inplace = True, method="ffill");
print(df);
In [7]:
df = pd.DataFrame({"first":[1, float("nan"), float("nan"), 7],
"second":[4, 5, float("nan"), 8]});
print(df);
df.fillna(inplace = True, method="bfill");
print(df);
In [8]:
df = pd.DataFrame({"first":[1, float("nan"), float("nan"), 7],
"second":[4, 5, float("nan"), 8]});
print(df);
df.fillna(value = -1, inplace = True);
print(df);
In [9]:
HPI_data = pd.read_pickle("./data/fiddy_states.pickle");
HPI_data["TX1yr"] = HPI_data["TX"].resample("A").mean(); #why is it so ugly!!!
HPI_data.fillna(inplace = True, method = "ffill"); # <-------
fig = plt.figure();
ax1 = plt.subplot2grid((1, 1), (0, 0));
HPI_data[["TX", "TX1yr"]].plot(ax = ax1);
plt.legend(loc = 4);
plt.show();
HPI_data = pd.read_pickle("./data/fiddy_states.pickle");
HPI_data["TX1yr"] = HPI_data["TX"].resample("A").mean(); #why is it so ugly!!!
HPI_data.fillna(inplace = True, method = "bfill"); # <-------
fig = plt.figure();
ax1 = plt.subplot2grid((1, 1), (0, 0));
HPI_data[["TX", "TX1yr"]].plot(ax = ax1);
plt.legend(loc = 4);
plt.show();
In [10]:
HPI_data = pd.read_pickle("./data/fiddy_states.pickle");
HPI_data["TX1yr"] = HPI_data["TX"].resample("A").mean(); #why is it so ugly!!!
HPI_data.fillna(inplace = True, value = -99999, limit = 10); # <-------
print(HPI_data["TX1yr"]);
print(HPI_data.isnull().values.sum()); # calculates NaN's count
In [ ]:
In [ ]:
In [ ]: