In [ ]:
# To Save back
# http://www.countingcalculi.com/explanations/google_sheets_and_jupyter_notebooks/
# from df2gspread import df2gspread as d2g
# wks_name = 'Jupyter Manipulated Data'
# d2g.upload(df, spreadsheet_key, wks_name, credentials=credentials, row_names=True)
In [ ]:
import pandas as pd
import numpy as np
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, HTML
# use Seaborn styles
%matplotlib inline
sns.set()
In [ ]:
# Authorise with Google Docs
scope = ['https://spreadsheets.google.com/feeds']
credentials = ServiceAccountCredentials.from_json_keyfile_name('/data/docs/JupyterSheets.json', scope)
gc = gspread.authorize(credentials)
In [ ]:
# Load Spreadsheet
spreadsheet_key = '1KQwNxNVYJ21K1LrDcAddEj1_yxSWlxVpc6xS9z542yY'
book = gc.open_by_key(spreadsheet_key)
uk = pd.DataFrame(book.worksheet("UKTxns").get_all_records())
us = pd.DataFrame(book.worksheet("USTxns").get_all_records())
In [ ]:
def sort_tables(df):
cols = ['Account', 'Date', 'Payee', 'Type', 'Category', 'Memo', 'Outflow', 'Inflow', 'Net']
df['Date'] = pd.to_datetime(df['Date'])
df['Inflow'] = pd.to_numeric(df['Inflow'])
df['Outflow'] = pd.to_numeric(df['Outflow'])
df['Inflow'].fillna(value=0, inplace=True)
df['Outflow'].fillna(value=0, inplace=True)
df['Net'] = df['Inflow'] - df ['Outflow']
#df['Net'] = pd.to_numeric(df['Net'])
df = df[cols]
return df
In [ ]:
def add_date_info(df):
"""
Adds columns for year, month, quarter, week number
"""
df['Year'], df['Month'] = df['Date'].dt.year, df['Date'].dt.month
df['Quarter'], df['Week'] = df['Date'].dt.quarter, df['Date'].dt.week
#df['Year'] = df['Year'].astype('int')
#df['Month'] = df['Month'].astype('int')
#df['Week'] = df['Week'].astype('int')
#df['Quarter'] = df['Quarter'].astype('int')
In [ ]:
sort_tables(uk)
add_date_info(uk)
#cols = ['Account', 'Date', 'Payee', 'Type', 'Category', 'Memo', 'Outflow', 'Inflow', 'Net']
#UK = UK[cols]
In [ ]:
cols = ['Account', 'Date', 'Payee', 'Type', 'Category', 'Memo', 'Outflow', 'Inflow', 'Net']
uk = uk[cols]
add_date_info(uk)
In [ ]:
uk.head()
In [ ]:
def f(row):
expenses = ['Expense', 'NBC', 'Expat']
if row['Type'] in expenses:
val = 'Expense'
else:
val = row['Type']
return val
In [ ]:
filtered = uk.query('Type != "Transfer" & Year != "2014"')
uktype = pd.pivot_table(filtered, index='Type', columns='Year', values='Net',
aggfunc=sum, fill_value=0, margins=True)
uktype
#uktype.loc['Expenses'] = uktype.loc['Expense'] + uktype.loc['Expat'] + uktype.loc['NBC']
#uktype
In [ ]:
sort_tables(us)
cols = ['Account', 'Date', 'Payee', 'Type', 'Category', 'Memo', 'Outflow', 'Inflow', 'Net']
us = us[cols]
add_date_info(us)
us_spend = pd.pivot_table(us.query('Type == "Expense"'), index='Category', columns='Year',
values='Net', aggfunc=sum, fill_value=0)
print(us_spend)
us_spend.plot(kind='bar')
In [ ]:
def us_annual_summary(df, year):
filtered = df[df["Year"] == year]
return pd.pivot_table(filtered.query('Type == "Expense"'), index='Category', columns=['Year', 'Month'],
values='Net', aggfunc=sum, fill_value=0)
In [ ]:
years = [2018, 2017, 2016, 2015]
for year in years:
display(us_annual_summary(us, year))