In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
from matplotlib import rc
rc('font', **{'family':'sans-serif',
'sans-serif':['Helvetica'],
'monospace': ['Inconsolata'],
'serif': ['Helvetica']})
rc('text', **{'usetex': True})
rc('text', **{'latex.preamble': '\usepackage{sfmath}'})
In [7]:
# get data from CSV, and wrangle
data = pd.read_csv(
'data.csv',
parse_dates={'Timestamp': ['timestamp']},
index_col='Timestamp')
# count non-empty columns per day, throw away everything except id and coordinate counts
ts = data.resample('D', how='count').iloc[:, 3:]
ts.rename(columns={
'tweet_id': 'Total Tweets',
'coordinates': 'Geotagged Tweets'},
inplace=True)
In [8]:
# plot
plt.clf()
fig = plt.figure(figsize=(7.28, 5.25))
ax = fig.add_subplot(111, axisbg='none')
ts.plot(
ax=ax,
kind="bar",
stacked="true",
title="Tweets from @brockleycentral's followers (Total Tweets: %s)" % '{0:,}'.format(len(data)),
# I happen to like teal and orange
color=["#ff8300", "#009aaf"],
lw=.5,
ec='#000000')
leg = ax.legend(loc='best', title="Daily Tweet Count")
frame = leg.get_frame()
frame.set_alpha(0.4)
# default formatting shows hours and gets cut off
ax.set_xticklabels(
[dt.strftime("%d-%m-%Y") for dt in ts.index.to_pydatetime()])
ax.set_ylabel("Number of Tweets")
ax.set_xlabel("Date")
ax.grid(b=None)
plt.tight_layout()
plt.savefig("example.png", format="png", bbox_inches='tight', transparent=True, dpi=100)
plt.show()
In [ ]: