In [61]:
import pandas as pd
import dateutil
import numpy as np
import matplotlib.pyplot as plt
from models import sync, Tweets
%matplotlib inline
In [62]:
from matplotlib import rc
rc('font', **{'family':'sans-serif',
'sans-serif':['Helvetica'],
'monospace': ['Inconsolata'],
'serif': ['Helvetica']})
rc('text', **{'usetex': True})
rc('text', **{'latex.preamble': '\usepackage{sfmath}'})
In [58]:
# Get Data from SQLAlchemy query
session = sync()
q = session.query(Tweets.id, Tweets.timestamp, Tweets.coordinates)
df = pd.DataFrame([row for row in q])
df.index = df[1]
del df[1]
df.rename(columns={
0: 'Total Tweets',
2: 'Geotagged Tweets'},
inplace=True)
df = df.replace({'Geotagged Tweets': None}, np.nan)
# count non-empty columns per day, throw away everything except id and coordinate counts
df = df.resample('D', how='count')
In [68]:
plt.clf()
fig = plt.figure(figsize=(7.28, 5.25))
ax = fig.add_subplot(111, axisbg='none')
df.plot(
ax=ax,
kind="bar",
stacked="true",
title="Tweets from @brockleycentral's followers (Total Tweets: %s)" % '{0:,}'.format(df['Total Tweets'].sum()),
# I happen to like teal and orange
color=["#009aaf", "#ff8300"],
lw=.5,
ec='#000000')
leg = ax.legend(loc='best', title="Daily Tweet Count")
frame = leg.get_frame()
frame.set_alpha(0.4)
# default formatting shows hours and gets cut off
ax.set_xticklabels(
[dt.strftime("%d-%m-%Y") for dt in df.index.to_pydatetime()])
ax.set_ylabel("Number of Tweets")
ax.set_xlabel("Date")
ax.grid(b=None)
plt.tight_layout()
# plt.savefig("tweets.png", format="png", bbox_inches='tight', transparent=True, dpi=100)
plt.show()
In [ ]: