In [105]:
%matplotlib inline
import csv
import datetime
from bs4 import BeautifulSoup as bs4
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use('seaborn-colorblind')
In [106]:
with open('startbucks-history.html') as fobj:
soup = bs4(fobj.read(), 'html.parser')
items = soup.find_all('li')
In [107]:
history = []
for item in items:
messages = item.find_all(class_='historyItemMessage')
stars_earned = messages[0].text.strip()
if not stars_earned == 'Reload' and not stars_earned.endswith('Stars Redeemed'):
stars_earned = stars_earned.replace(' Stars Earned', '')
location = messages[2].text.strip()
spent = messages[3].text.strip().replace(' CAD', '')
try:
date = datetime.datetime.strptime('{} 2017'.format(messages[4].text.strip()), '%B %d %Y').date().isoformat()
except IndexError:
pass
history.append(dict(stars_earned=stars_earned, location=location, spent=spent, date=date))
In [108]:
with open('startbucks-history.csv', 'w') as csvfile:
header_row = history[0].keys()
writer = csv.DictWriter(csvfile, fieldnames=header_row)
writer.writeheader()
for item in history:
writer.writerow(item)
In [136]:
df = pd.read_csv('startbucks-history.csv', index_col=['date'], parse_dates=['date'])
df['spent'] = (df['spent'].replace('[\$,)]', '', regex=True)
.replace('[(]','-', regex=True).astype(float))
df.sort_index()
df.head()
In [141]:
title = 'Amount spent at Starbucks from {} to {}.'.format(df.iloc[[0]].index.strftime('%-d %b')[0],
df.iloc[[-1]].index.strftime('%-d %b')[0])
plot = df.plot(y=['spent'], legend=False, kind='line', figsize=(10,5),
title=title)
figure = plot.get_figure()
figure.savefig('figure.png')
In [ ]: