In [1]:
import pandas as pd
import json
import pprint

Data Cleaning

Combined Divisions (Format: 2017 = 2018)

  • Police
    • Assumed Field Services Division = Office of Specialized Services Division
  • Utilities
    • Business & Integrated Planning Division = Business Services Division
  • Public Works
    • Marina Division = Parking Services Division
  • Debt Service
    • 2013 Debt Service Water Revenues = 2013 Water Revenue Bonds
  • Fire
    • Emergency Operations Division = Operations Division
    • Community Risk Reduction Division = Risk Reduction Division
  • Information Technology
    • 311 Division = Sacramento City 311 Division
  • City Manager
    • City Manager Division = Office of the City Manager Division
    • Media & Communications Division = Office of Media & Communications Division

Import csv

Import the CSV, select columns and rows of interest


In [2]:
# read in csv
df = pd.read_csv('FY15__FY16__FY17__FY18_cleaned.csv')

# filter for 2017 and 2018 budget years
df = df[(df['budget_year'] == 'FY18') | (df['budget_year'] == 'FY17')]

# filter for expenses only
df = df[df['account_type'] == 'Expenses']

# select columns of interest
df = df[['budget_year', 'department', 'division', 'account_category', 'amount']]

# drop negative amounts (reimbursments)
df = df[df['amount'] >= 0]

In [3]:
# split amount column into two new columns based on fiscal year
for index,row in df.iterrows():
    if row['budget_year'] == 'FY17':
        df.loc[index, 'amount2017'] = row['amount']
        df.loc[index, 'amount2018'] = 0
    else:
        df.loc[index, 'amount2018'] = row['amount']
        df.loc[index, 'amount2017'] = 0

# drop amount column
df.drop(columns=['amount', 'budget_year'], inplace=True)

# sum up amounts for each account category
df_grouped = df.groupby(['department', 'division', 'account_category']).sum().reset_index()

In [4]:
df_grouped


Out[4]:
department division account_category amount2018 amount2017
0 City Attorney City Attorney Division Books and Periodicals 63382.0 63382.0
1 City Attorney City Attorney Division Employee Benefits 1797391.0 1663850.0
2 City Attorney City Attorney Division Employer Paid Taxes 634093.0 433415.0
3 City Attorney City Attorney Division Energy and Fuel 253.0 440.0
4 City Attorney City Attorney Division Food 2483.0 2483.0
... ... ... ... ... ...
1220 Utilities Operations & Maintenance Division Purchased Prof and Tech Svcs 2035553.0 2316218.0
1221 Utilities Operations & Maintenance Division Purchased Property Services 5704458.0 5818690.0
1222 Utilities Operations & Maintenance Division Salaries 25759278.0 25324433.0
1223 Utilities Operations & Maintenance Division Service & Supply Adjustments 128280.0 120000.0
1224 Utilities Operations & Maintenance Division Transfers 1 1831671.0 1860173.0

1225 rows × 5 columns

Create json

Departments


In [5]:
# top json structure
post = {
    'name': 'Sacramento Budget 2017-2018',
    'children': []
}

# sum total for all departments
sum_departments = df_grouped['amount2018'].sum()

# dictionary for holding department sums
sum_departments_dict = {}

# set cutoff for the "other" category groups
# this determines what items go in the "other" category
cutoff = 0.02

#####################
# DEPARTMENTS
#####################

other_departments = {'name': 'Other Departments', 'desc': '', 'children': []}

# loop through all departments
for department in df_grouped['department'].unique():
    
    # filter dataframe based on current department
    df_department = df_grouped[df_grouped['department'] == department]
    
    # sum for current department
    sum_department = df_department['amount2018'].sum()
    
    # save the department sum for determining "other" group for divisions
    sum_departments_dict[department] = sum_department
    
    # either append department to post child if large enough, or append to other departments group
    if sum_department / sum_departments > cutoff:
        post['children'].append({'name': department, 'desc': '', 'children': []})
    else:
        other_departments['children'].append({'name': department, 'desc': '', 'children': []})
    
post['children'].append(other_departments)

Divisions


In [6]:
#####################
# DIVISIONS
#####################

# loop through departments in post
for department in post['children']:
    
    # Loop through "other" category first
    if department['name'] == 'Other Departments':

        # loop through departments in other departments group
        for other_department in department['children']:

            other_divisions = {'name': 'Other Divisions', 'desc': '', 'children': []}
            
            # loop through divisions
            for division in df_grouped['division'].unique():
            
                # filter dataframe based on current division
                df_division = df_grouped[df_grouped['division'] == division]
                
                # sum for current division
                sum_division = df_division['amount2018'].sum()
                
                if other_department['name'] in df_division['department'].unique():
                    
                    # either append division to the department if large enough, or append to other divisions group for the department
                    if sum_division / sum_departments_dict[other_department['name']] > cutoff:
                        other_department['children'].append({'name': division, 'desc': '', 'children': []})
                    else:
                        other_divisions['children'].append({'name': division, 'desc': '', 'children': []})      
    
            other_department['children'].append(other_divisions)
    
    else:
        
        other_divisions = {'name': 'Other Divisions', 'desc': '', 'children': []}
        
        # loop through divisions
        for division in df_grouped['division'].unique():
            
            # filter dataframe based on current division
            df_division = df_grouped[df_grouped['division'] == division]

            # sum for current division
            sum_division = df_division['amount2018'].sum()

            if department['name'] in df_division['department'].unique():

                if sum_division / sum_departments_dict[department['name']] > cutoff:
                    department['children'].append({'name': division, 'desc': '', 'children': []})
                else:
                    other_divisions['children'].append({'name': division, 'desc': '', 'children': []})
      
        department['children'].append(other_divisions)

Accounts


In [7]:
#####################
# ACCOUNTS
#####################

# loop through departments
for department in post['children']:
    
    if department['name'] == 'Other Departments':
            
        # loop through departments in other departments group
        for other_department in department['children']:
            
            # loop through divisions
            for division in other_department['children']:
                
                if division['name'] == 'Other Divisions':

                    # loop through other divisions
                    for other_division in division['children']:

                        other_accounts = {'name': 'Other Accounts', 'desc': '', 'children': []}

                        # filter dataframe based on current division
                        df_division = df_grouped[df_grouped['division'] == other_division['name']]

                        # sum for current division
                        sum_division = df_division['amount2018'].sum()

                        # loop through each account for the division
                        for index,row in df_division.iterrows():

                            if row['amount2018'] / sum_division > cutoff:
                                other_division['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
                            else:
                                other_accounts['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})

                        other_division['children'].append(other_accounts)

                else:
                    other_accounts = {'name': 'Other Accounts', 'desc': '', 'children': []}

                    # filter dataframe based on current division
                    df_division = df_grouped[df_grouped['division'] == division['name']]

                    # sum for current division
                    sum_division = df_division['amount2018'].sum()

                    # loop through each account for the division
                    for index,row in df_division.iterrows():

                        if row['amount2018'] / sum_division > cutoff:
                            division['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
                        else:
                            other_accounts['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})

                    division['children'].append(other_accounts)

    
    else:
    
        # loop through divisions
        for division in department['children']:

            if division['name'] == 'Other Divisions':

                # loop through other divisions
                for other_division in division['children']:

                    other_accounts = {'name': 'Other Accounts', 'desc': '', 'children': []}

                    # filter dataframe based on current division
                    df_division = df_grouped[df_grouped['division'] == other_division['name']]

                    # sum for current division
                    sum_division = df_division['amount2018'].sum()

                    # loop through each account for the division
                    for index,row in df_division.iterrows():

                        if row['amount2018'] / sum_division > cutoff:
                            other_division['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
                        else:
                            other_accounts['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})

                    other_division['children'].append(other_accounts)

            else:
                other_accounts = {'name': 'Other Accounts', 'desc': '', 'children': []}

                # filter dataframe based on current division
                df_division = df_grouped[df_grouped['division'] == division['name']]

                # sum for current division
                sum_division = df_division['amount2018'].sum()

                # loop through each account for the division
                for index,row in df_division.iterrows():

                    if row['amount2018'] / sum_division > cutoff:
                        division['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})
                    else:
                        other_accounts['children'].append({'name': row['account_category'], 'desc': '', 'amount2018': row['amount2018'], 'amount2017': row['amount2017']})

                division['children'].append(other_accounts)

# save to file
with open('sacbudget_2018.json', 'w', encoding='utf-8') as f:
    json.dump(post, f, ensure_ascii=False, indent=4)


C:\Users\Smili\anaconda3\lib\site-packages\ipykernel_launcher.py:80: RuntimeWarning: invalid value encountered in double_scalars
C:\Users\Smili\anaconda3\lib\site-packages\ipykernel_launcher.py:32: RuntimeWarning: invalid value encountered in double_scalars

TO-DO

  • Some category text is still cut off: adjust chart size, adjust "Other Category" cutoff, or give a straight cutoff to display
  • Fix tooltip position
  • Correct department/division/account names between 2017 and 2018 for %change calculation
  • Add descriptions
  • Likely easier to do in node.js?
  • Investigate highcharts treemap drilldown: https://www.highcharts.com/demo/treemap-large-dataset

In [ ]: