In [18]:
import pandas as pd
df = pd.read_csv('./FGCU_salary_dataset.csv')
df_m = df
d = df_m['Annual Salary'].apply(lambda x: x.lstrip('$'))
d = d.astype('float64')
df.head()


Out[18]:
Last Name First Name Annual Salary FGCU Hire Date College/Dept Class Title Working Title Employee Class
0 Gray-Vickrey Margaret $127972.27 07/01/1996 Academic Affairs Administration Assoc. Vice President/Prof. Assc. Provost/Assoc. VP C&I Faculty Admin 10, 11, 12 mo
1 Rogers Hudson $134316.57 08/07/1997 Academic Affairs Administration Assoc. Vice President/Prof. Associate VP & Professor Faculty Admin 10, 11, 12 mo
2 Hart Erika $26650.11 08/03/2001 Academic Affairs Administration Program Asst Program Assistant Support Personnel NonExempt PT
3 Deschene Catherine $63960.27 01/05/2004 Academic Affairs Administration Executive Asst Exec Asst to Provost & VPAA Administrative/Professional
4 Baker Jennifer $90000.00 05/26/2009 Academic Affairs Administration Dir, Academic Support Services Dir., Budgets & Management Svs Administrative/Professional

In [32]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
with plt.style.context('fivethirtyeight'):
    plt.hist(d.values, bins=50, alpha=0.4)
    plt.axvline(np.mean(d.values), ymin=0.0, ymax=600, linewidth=2, color='k', label='mean')
    plt.axvline(x=np.median(d.values), ymin=0.0, ymax=600, linewidth=2, color='b', linestyle='--', label='median')
    plt.legend(loc='upper right')
    plt.ylabel('Number of Employees')
    plt.xlabel('Annual Salary')
    plt.show()



In [34]:
data = np.random.normal(loc=0.0, scale=1.0, size=10000)

In [38]:
with plt.style.context('fivethirtyeight'):
    plt.hist(data, bins=50, alpha=0.4)
    plt.axvline(np.mean(data), ymin=0.0, ymax=600, linewidth=2, color='k', label='mean')
    plt.axvline(x=np.median(data), ymin=0.0, ymax=600, linewidth=2, color='b', linestyle='--', label='median')
    plt.legend(loc='upper right')
    plt.ylabel('Count')
    plt.xlabel('x')
    plt.show()



In [39]:
np.mean(data)


Out[39]:
0.0033737113299363549

In [40]:
np.median(data)


Out[40]:
0.016896725981360865

In [ ]: