In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import statsmodels.api as sm
from math import sqrt

%matplotlib inline

base_url = 'https://raw.githubusercontent.com/aisaacso/SafeBoss/master/'

empl_url = base_url + 'Employee_Earnings_Report_2014.csv'

employee_df = pd.read_csv(empl_url, dtype = {'NAME': str, 'TITLE': str})

employee_df.rename(columns = {'DEPARTMENT NAME':'DeptName'}, inplace = True)
employee_df.rename(columns={'TOTAL EARNINGS': 'TotEarn'}, inplace = True)

def currency_to_float(val):
    val = float(val.replace('$', ''))
    return val

curr_cols = list(employee_df)[3:11]

for c in curr_cols:
    employee_df[c] = employee_df[c].map(currency_to_float)

In [2]:
# Which department gets the most overtime?
ot_pivot = pd.pivot_table(employee_df, index = 'DeptName', aggfunc = 'sum')
top_ten_ot = ot_pivot.sort_values('OVERTIME', axis = 0, ascending = False).head(15)
top_ten_ot['OVERTIME'].plot(kind = 'bar', title='Depts with the Most Overtime')


Out[2]:
<matplotlib.axes._subplots.AxesSubplot at 0x103baced0>

In [ ]: