In [61]:
import pandas as pd
import dateutil
import numpy as np
import matplotlib.pyplot as plt
from models import sync, Tweets
%matplotlib inline

In [62]:
from matplotlib import rc
rc('font', **{'family':'sans-serif',
    'sans-serif':['Helvetica'],
    'monospace': ['Inconsolata'],
    'serif': ['Helvetica']})
rc('text', **{'usetex': True})
rc('text', **{'latex.preamble': '\usepackage{sfmath}'})

In [58]:
# Get Data from SQLAlchemy query
session = sync()
q = session.query(Tweets.id, Tweets.timestamp, Tweets.coordinates)
df = pd.DataFrame([row for row in q])
df.index = df[1]
del df[1]
df.rename(columns={
    0: 'Total Tweets',
    2: 'Geotagged Tweets'},
    inplace=True)
df = df.replace({'Geotagged Tweets': None}, np.nan)
# count non-empty columns per day, throw away everything except id and coordinate counts
df = df.resample('D', how='count')

In [68]:
plt.clf()
fig = plt.figure(figsize=(7.28, 5.25))
ax = fig.add_subplot(111, axisbg='none')
df.plot(
    ax=ax,
    kind="bar",
    stacked="true",
    title="Tweets from @brockleycentral's followers (Total Tweets: %s)" % '{0:,}'.format(df['Total Tweets'].sum()),
    # I happen to like teal and orange
    color=["#009aaf", "#ff8300"],
    lw=.5,
    ec='#000000')
leg = ax.legend(loc='best', title="Daily Tweet Count")
frame = leg.get_frame()
frame.set_alpha(0.4)
# default formatting shows hours and gets cut off
ax.set_xticklabels(
    [dt.strftime("%d-%m-%Y") for dt in df.index.to_pydatetime()])
ax.set_ylabel("Number of Tweets")
ax.set_xlabel("Date")
ax.grid(b=None)
plt.tight_layout()
# plt.savefig("tweets.png", format="png", bbox_inches='tight', transparent=True, dpi=100)
plt.show()


<matplotlib.figure.Figure at 0x10e5bd090>

In [ ]: