In [1]:
import pandas as pd
import json
import pprint
In [2]:
# read in csv
df = pd.read_csv('FY15__FY16__FY17__FY18_cleaned.csv')
# filter for 2017 and 2018 budget years
df = df[(df['budget_year'] == 'FY18') | (df['budget_year'] == 'FY17')]
# filter for expenses only
df = df[df['account_type'] == 'Expenses']
# select columns of interest
df = df[['budget_year', 'department', 'division', 'account_category', 'amount']]
# drop negative amounts (reimbursments)
df = df[df['amount'] >= 0]
In [3]:
# split amount column into two new columns based on fiscal year
for index,row in df.iterrows():
if row['budget_year'] == 'FY17':
df.loc[index, 'amount2017'] = row['amount']
df.loc[index, 'amount2018'] = 0
else:
df.loc[index, 'amount2018'] = row['amount']
df.loc[index, 'amount2017'] = 0
# drop amount column
df.drop(columns=['amount', 'budget_year'], inplace=True)
# sum up amounts for each account category
df_grouped = df.groupby(['department', 'division', 'account_category']).sum().reset_index()
In [4]:
df_grouped
Out[4]:
In [5]:
# top json structure
post = {
'name': 'Sacramento Budget 2017-2018',
'children': []
}
# sum total for all departments
sum_departments = df_grouped['amount2018'].sum()
# dictionary for holding department sums
sum_departments_dict = {}
# set cutoff for the "other" category groups
# this determines what items go in the "other" category
cutoff = 0.02
#####################
# DEPARTMENTS
#####################
other_departments = {'name': 'Other Departments', 'desc': '', 'children': []}
# loop through all departments
for department in df_grouped['department'].unique():
# filter dataframe based on current department
df_department = df_grouped[df_grouped['department'] == department]
# sum for current department
sum_department = df_department['amount2018'].sum()
# save the department sum for determining "other" group for divisions
sum_departments_dict[department] = sum_department
# either append department to post child if large enough, or append to other departments group
if sum_department / sum_departments > cutoff:
post['children'].append({'name': department, 'desc': '', 'children': []})
else:
other_departments['children'].append({'name': department, 'desc': '', 'children': []})
post['children'].append(other_departments)
In [6]:
#####################
# DIVISIONS
#####################
# loop through departments in post
for department in post['children']:
# Loop through "other" category first
if department['name'] == 'Other Departments':
# loop through departments in other departments group
for other_department in department['children']:
other_divisions = {'name': 'Other Divisions', 'desc': '', 'children': []}
# loop through divisions
for division in df_grouped['division'].unique():
# filter dataframe based on current division
df_division = df_grouped[df_grouped['division'] == division]
# sum for current division
sum_division = df_division['amount2018'].sum()
if other_department['name'] in df_division['department'].unique():
# either append division to the department if large enough, or append to other divisions group for the department
if sum_division / sum_departments_dict[other_department['name']] > cutoff:
other_department['children'].append({'name': division, 'desc': '', 'children': []})
else:
other_divisions['children'].append({'name': division, 'desc': '', 'children': []})
other_department['children'].append(other_divisions)
else:
other_divisions = {'name': 'Other Divisions', 'desc': '', 'children': []}
# loop through divisions
for division in df_grouped['division'].unique():
# filter dataframe based on current division
df_division = df_grouped[df_grouped['division'] == division]
# sum for current division
sum_division = df_division['amount2018'].sum()
if department['name'] in df_division['department'].unique():
if sum_division / sum_departments_dict[department['name']] > cutoff:
department['children'].append({'name': division, 'desc': '', 'children': []})
else:
other_divisions['children'].append({'name': division, 'desc': '', 'children': []})
department['children'].append(other_divisions)
In [7]:
#####################
# ACCOUNTS
#####################
# loop through departments
for department in post['children']:
if department['name'] == 'Other Departments':
# loop through departments in other departments group
for other_department in department['children']:
# loop through divisions
for division in other_department['children']:
if division['name'] == 'Other Divisions':
# loop through other divisions
for other_division in division['children']:
other_accounts = {'name': 'Other Accounts', 'desc': '', 'children': []}
# filter dataframe based on current division
df_division = df_grouped[df_grouped['division'] == other_division['name']]
# sum for current division
sum_division = df_division['amount2018'].sum()
# loop through each account for the division
for index,row in df_division.iterrows():
if row['amount2018'] / sum_division > cutoff:
other_division['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
else:
other_accounts['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
other_division['children'].append(other_accounts)
else:
other_accounts = {'name': 'Other Accounts', 'desc': '', 'children': []}
# filter dataframe based on current division
df_division = df_grouped[df_grouped['division'] == division['name']]
# sum for current division
sum_division = df_division['amount2018'].sum()
# loop through each account for the division
for index,row in df_division.iterrows():
if row['amount2018'] / sum_division > cutoff:
division['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
else:
other_accounts['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
division['children'].append(other_accounts)
else:
# loop through divisions
for division in department['children']:
if division['name'] == 'Other Divisions':
# loop through other divisions
for other_division in division['children']:
other_accounts = {'name': 'Other Accounts', 'desc': '', 'children': []}
# filter dataframe based on current division
df_division = df_grouped[df_grouped['division'] == other_division['name']]
# sum for current division
sum_division = df_division['amount2018'].sum()
# loop through each account for the division
for index,row in df_division.iterrows():
if row['amount2018'] / sum_division > cutoff:
other_division['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
else:
other_accounts['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
other_division['children'].append(other_accounts)
else:
other_accounts = {'name': 'Other Accounts', 'desc': '', 'children': []}
# filter dataframe based on current division
df_division = df_grouped[df_grouped['division'] == division['name']]
# sum for current division
sum_division = df_division['amount2018'].sum()
# loop through each account for the division
for index,row in df_division.iterrows():
if row['amount2018'] / sum_division > cutoff:
division['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
else:
other_accounts['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
division['children'].append(other_accounts)
# save to file
with open('sacbudget_2018.json', 'w', encoding='utf-8') as f:
json.dump(post, f, ensure_ascii=False, indent=4)
In [ ]: