In [596]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import FuncFormatter
%matplotlib inline
In [2]:
donations = pd.read_pickle('out/21/donations.pkl')
us_states = pd.read_pickle('out/11/states_gps.pkl')
us_counties = pd.read_pickle('out/11/counties_gps.pkl')
population = pd.read_pickle('out/11/indian_population.pkl')
In [3]:
population = population.drop(population[population.state.isin(['AK', 'HI', 'PR', 'GU', 'VI', 'MP', 'AS'])].index, axis=0)
donations = donations.drop(donations[donations.state.isin(['AK', 'HI', 'PR', 'GU', 'VI', 'MP', 'AS'])].index, axis=0)
In [4]:
# Color map for non-charitable and charitable
colormap = ['indianred','steelblue']
thousands_formatter = FuncFormatter('{:,.0f}'.format)
sample_donor_id = '_1D50SWTKX'
In [5]:
donations[donations.donor_id == sample_donor_id].head()
Out[5]:
In [6]:
donations.columns
Out[6]:
In [7]:
fig, axes = plt.subplots(2,1, sharex=True, figsize=(12,8))
donations.groupby(['activity_year', 'is_service']).amount.sum().unstack()\
.plot(ax=axes[0], kind='bar', color=colormap, stacked=True,
title='How much money is coming in as donations/contributions every year?')
axes[0].get_yaxis().set_major_formatter(thousands_formatter)
axes[0].set_ylabel('Total amount')
donations.groupby(['activity_year', 'is_service']).donor_id.nunique().unstack()\
.plot(ax=axes[1], kind='bar', color=colormap, stacked=True,
title='How many donors are donating/contributing every year to SEF?')
axes[1].set_ylabel('Number of distinct donors')
axes[1].set_xlabel('Year of activity (data until Nov-2015)')
plt.show()
In [8]:
cumulative_years = np.cumsum(
donations.groupby(['activity_year', 'activity_month'])['amount', ]\
.sum()\
.unstack()\
.fillna(0)
, axis=1, dtype='int64').stack()
In [9]:
from bokeh.plotting import figure, output_notebook, show, ColumnDataSource
from bokeh.models.formatters import NumeralTickFormatter
import calendar
from bokeh.models import HoverTool
import seaborn as sns
def flatten(lst):
return [item for sublist in lst for item in sublist]
def monthly_amount_multiline_plot(year_month_data, cumulative=True, **kwargs):
hover = HoverTool(
tooltips="""
<div>
@months, @years
</div>
""")
ylabel = 'Total Amount'
if cumulative == True:
ylabel = ylabel + ' (cumulative)'
data = np.cumsum(year_month_data, axis=1, dtype='int64')
else:
data = year_month_data.copy()
amounts = data.stack()
years = sorted(data.index.get_level_values(0).unique())
numyears = len(years)
months = range(1,13)
palette = sns.color_palette("muted", numyears).as_hex()
xs = np.array(months * numyears).reshape(numyears, 12).tolist()
ys = data.amount.values.reshape(numyears, 12).astype('int32').tolist()
print xs
source = ColumnDataSource(
data=dict(
x=flatten(xs),
y=flatten(ys),
months=calendar.month_abbr[1:]*numyears,
years=np.repeat(years, 12),
)
)
p = figure(plot_width=600, plot_height=600, x_range=calendar.month_abbr[1:], tools=[hover], **kwargs)
p.multi_line(xs=xs, ys=ys, line_width=2, line_color=palette)
p.circle('x', 'y', size=10, source=source)
p.xaxis.axis_label = 'Month'
p.yaxis.axis_label = ylabel
p.yaxis.formatter = NumeralTickFormatter(format='0,0')
return p
In [10]:
ymdata = donations.groupby(['activity_year', 'activity_month'])['amount', ]\
.sum()\
.unstack()\
.fillna(0)
ymdata.index.get_level_values(0).unique()
Out[10]:
In [11]:
output_notebook()
p = monthly_amount_multiline_plot(ymdata, cumulative=True, title='Amount donated over the years')
show(p)