In [1]:

    
import pandas as pd
import json
import pprint

Data Cleaning

Combined Divisions (Format: 2017 = 2018)

Police
- Assumed Field Services Division = Office of Specialized Services Division
Utilities
- Business & Integrated Planning Division = Business Services Division
Public Works
- Marina Division = Parking Services Division
Debt Service
- 2013 Debt Service Water Revenues = 2013 Water Revenue Bonds
Fire
- Emergency Operations Division = Operations Division
- Community Risk Reduction Division = Risk Reduction Division
Information Technology
- 311 Division = Sacramento City 311 Division
City Manager
- City Manager Division = Office of the City Manager Division
- Media & Communications Division = Office of Media & Communications Division

Import csv

Import the CSV, select columns and rows of interest



In [2]:

    
# read in csv
df = pd.read_csv('FY15__FY16__FY17__FY18_cleaned.csv')

# filter for 2017 and 2018 budget years
df = df[(df['budget_year'] == 'FY18') | (df['budget_year'] == 'FY17')]

# filter for expenses only
df = df[df['account_type'] == 'Expenses']

# select columns of interest
df = df[['budget_year', 'department', 'division', 'account_category', 'amount']]

# drop negative amounts (reimbursments)
df = df[df['amount'] >= 0]



In [3]:

    
# split amount column into two new columns based on fiscal year
for index,row in df.iterrows():
    if row['budget_year'] == 'FY17':
        df.loc[index, 'amount2017'] = row['amount']
        df.loc[index, 'amount2018'] = 0
    else:
        df.loc[index, 'amount2018'] = row['amount']
        df.loc[index, 'amount2017'] = 0

# drop amount column
df.drop(columns=['amount', 'budget_year'], inplace=True)

# sum up amounts for each account category
df_grouped = df.groupby(['department', 'division', 'account_category']).sum().reset_index()



In [4]:

    
df_grouped









    Out[4]:







  
    
      
      department
      division
      account_category
      amount2018
      amount2017
    
  
  
    
      0
      City Attorney
      City Attorney Division
      Books and Periodicals
      63382.0
      63382.0
    
    
      1
      City Attorney
      City Attorney Division
      Employee Benefits
      1797391.0
      1663850.0
    
    
      2
      City Attorney
      City Attorney Division
      Employer Paid Taxes
      634093.0
      433415.0
    
    
      3
      City Attorney
      City Attorney Division
      Energy and Fuel
      253.0
      440.0
    
    
      4
      City Attorney
      City Attorney Division
      Food
      2483.0
      2483.0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      1220
      Utilities
      Operations & Maintenance Division
      Purchased Prof and Tech Svcs
      2035553.0
      2316218.0
    
    
      1221
      Utilities
      Operations & Maintenance Division
      Purchased Property Services
      5704458.0
      5818690.0
    
    
      1222
      Utilities
      Operations & Maintenance Division
      Salaries
      25759278.0
      25324433.0
    
    
      1223
      Utilities
      Operations & Maintenance Division
      Service & Supply Adjustments
      128280.0
      120000.0
    
    
      1224
      Utilities
      Operations & Maintenance Division
      Transfers 1
      1831671.0
      1860173.0
    
  

1225 rows × 5 columns

Create json

Departments



In [5]:

    
# top json structure
post = {
    'name': 'Sacramento Budget 2017-2018',
    'children': []
}

# sum total for all departments
sum_departments = df_grouped['amount2018'].sum()

# dictionary for holding department sums
sum_departments_dict = {}

# set cutoff for the "other" category groups
# this determines what items go in the "other" category
cutoff = 0.02

#####################
# DEPARTMENTS
#####################

other_departments = {'name': 'Other Departments', 'desc': '', 'children': []}

# loop through all departments
for department in df_grouped['department'].unique():
    
    # filter dataframe based on current department
    df_department = df_grouped[df_grouped['department'] == department]
    
    # sum for current department
    sum_department = df_department['amount2018'].sum()
    
    # save the department sum for determining "other" group for divisions
    sum_departments_dict[department] = sum_department
    
    # either append department to post child if large enough, or append to other departments group
    if sum_department / sum_departments > cutoff:
        post['children'].append({'name': department, 'desc': '', 'children': []})
    else:
        other_departments['children'].append({'name': department, 'desc': '', 'children': []})
    
post['children'].append(other_departments)

Divisions



In [6]:

    
#####################
# DIVISIONS
#####################

# loop through departments in post
for department in post['children']:
    
    # Loop through "other" category first
    if department['name'] == 'Other Departments':

        # loop through departments in other departments group
        for other_department in department['children']:

            other_divisions = {'name': 'Other Divisions', 'desc': '', 'children': []}
            
            # loop through divisions
            for division in df_grouped['division'].unique():
            
                # filter dataframe based on current division
                df_division = df_grouped[df_grouped['division'] == division]
                
                # sum for current division
                sum_division = df_division['amount2018'].sum()
                
                if other_department['name'] in df_division['department'].unique():
                    
                    # either append division to the department if large enough, or append to other divisions group for the department
                    if sum_division / sum_departments_dict[other_department['name']] > cutoff:
                        other_department['children'].append({'name': division, 'desc': '', 'children': []})
                    else:
                        other_divisions['children'].append({'name': division, 'desc': '', 'children': []})      
    
            other_department['children'].append(other_divisions)
    
    else:
        
        other_divisions = {'name': 'Other Divisions', 'desc': '', 'children': []}
        
        # loop through divisions
        for division in df_grouped['division'].unique():
            
            # filter dataframe based on current division
            df_division = df_grouped[df_grouped['division'] == division]

            # sum for current division
            sum_division = df_division['amount2018'].sum()

            if department['name'] in df_division['department'].unique():

                if sum_division / sum_departments_dict[department['name']] > cutoff:
                    department['children'].append({'name': division, 'desc': '', 'children': []})
                else:
                    other_divisions['children'].append({'name': division, 'desc': '', 'children': []})
      
        department['children'].append(other_divisions)

Accounts



In [7]:

    
#####################
# ACCOUNTS
#####################

# loop through departments
for department in post['children']:
    
    if department['name'] == 'Other Departments':
            
        # loop through departments in other departments group
        for other_department in department['children']:
            
            # loop through divisions
            for division in other_department['children']:
                
                if division['name'] == 'Other Divisions':

                    # loop through other divisions
                    for other_division in division['children']:

                        other_accounts = {'name': 'Other Accounts', 'desc': '', 'children': []}

                        # filter dataframe based on current division
                        df_division = df_grouped[df_grouped['division'] == other_division['name']]

                        # sum for current division
                        sum_division = df_division['amount2018'].sum()

                        # loop through each account for the division
                        for index,row in df_division.iterrows():

                            if row['amount2018'] / sum_division > cutoff:
                                other_division['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
                            else:
                                other_accounts['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})

                        other_division['children'].append(other_accounts)

                else:
                    other_accounts = {'name': 'Other Accounts', 'desc': '', 'children': []}

                    # filter dataframe based on current division
                    df_division = df_grouped[df_grouped['division'] == division['name']]

                    # sum for current division
                    sum_division = df_division['amount2018'].sum()

                    # loop through each account for the division
                    for index,row in df_division.iterrows():

                        if row['amount2018'] / sum_division > cutoff:
                            division['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
                        else:
                            other_accounts['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})

                    division['children'].append(other_accounts)

    
    else:
    
        # loop through divisions
        for division in department['children']:

            if division['name'] == 'Other Divisions':

                # loop through other divisions
                for other_division in division['children']:

                    other_accounts = {'name': 'Other Accounts', 'desc': '', 'children': []}

                    # filter dataframe based on current division
                    df_division = df_grouped[df_grouped['division'] == other_division['name']]

                    # sum for current division
                    sum_division = df_division['amount2018'].sum()

                    # loop through each account for the division
                    for index,row in df_division.iterrows():

                        if row['amount2018'] / sum_division > cutoff:
                            other_division['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
                        else:
                            other_accounts['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})

                    other_division['children'].append(other_accounts)

            else:
                other_accounts = {'name': 'Other Accounts', 'desc': '', 'children': []}

                # filter dataframe based on current division
                df_division = df_grouped[df_grouped['division'] == division['name']]

                # sum for current division
                sum_division = df_division['amount2018'].sum()

                # loop through each account for the division
                for index,row in df_division.iterrows():

                    if row['amount2018'] / sum_division > cutoff:
                        division['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
                    else:
                        other_accounts['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})

                division['children'].append(other_accounts)

# save to file
with open('sacbudget_2018.json', 'w', encoding='utf-8') as f:
    json.dump(post, f, ensure_ascii=False, indent=4)









    



C:\Users\Smili\anaconda3\lib\site-packages\ipykernel_launcher.py:80: RuntimeWarning: invalid value encountered in double_scalars
C:\Users\Smili\anaconda3\lib\site-packages\ipykernel_launcher.py:32: RuntimeWarning: invalid value encountered in double_scalars

TO-DO

Some category text is still cut off: adjust chart size, adjust "Other Category" cutoff, or give a straight cutoff to display
Fix tooltip position
Correct department/division/account names between 2017 and 2018 for %change calculation
Add descriptions
Likely easier to do in node.js?
Investigate highcharts treemap drilldown: https://www.highcharts.com/demo/treemap-large-dataset



In [ ]:

	department	division	account_category	amount2018	amount2017
0	City Attorney	City Attorney Division	Books and Periodicals	63382.0	63382.0
1	City Attorney	City Attorney Division	Employee Benefits	1797391.0	1663850.0
2	City Attorney	City Attorney Division	Employer Paid Taxes	634093.0	433415.0
3	City Attorney	City Attorney Division	Energy and Fuel	253.0	440.0
4	City Attorney	City Attorney Division	Food	2483.0	2483.0
...	...	...	...	...	...
1220	Utilities	Operations & Maintenance Division	Purchased Prof and Tech Svcs	2035553.0	2316218.0
1221	Utilities	Operations & Maintenance Division	Purchased Property Services	5704458.0	5818690.0
1222	Utilities	Operations & Maintenance Division	Salaries	25759278.0	25324433.0
1223	Utilities	Operations & Maintenance Division	Service & Supply Adjustments	128280.0	120000.0
1224	Utilities	Operations & Maintenance Division	Transfers 1	1831671.0	1860173.0