In [66]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
In [67]:
def process_year(year):
df = pd.read_html('./data/{}.html'.format(year),
header=1,
index_col=['Category'])[0]
df = df.drop(['Total', 'Average'], axis=1)
df = df.rename(index=str, columns={'January': '{}-01'.format(year),
'February': '{}-02'.format(year),
'March': '{}-03'.format(year),
'April': '{}-04'.format(year),
'May': '{}-05'.format(year),
'June': '{}-06'.format(year),
'July': '{}-07'.format(year),
'August': '{}-08'.format(year),
'September': '{}-09'.format(year),
'October': '{}-10'.format(year),
'November': '{}-11'.format(year),
'December': '{}-12'.format(year),
})
df = df[df.columns[1:]].replace('[\$,]', '', regex=True).astype(float)
return df
In [68]:
df = process_year(2017)
df.head()
Out[68]:
In [69]:
dfs = []
for year in range(2009, 2018):
dfs.append(process_year(year))
len(dfs)
Out[69]:
In [70]:
df = pd.concat(dfs, axis=1)
df.head()
Out[70]:
In [92]:
print(plt.style.available)
plt.style.use('seaborn-talk')
In [96]:
df.transpose().plot(kind='area')
Out[96]:
In [ ]: