In [18]:
import pandas as pd
df = pd.read_csv('./FGCU_salary_dataset.csv')
df_m = df
d = df_m['Annual Salary'].apply(lambda x: x.lstrip('$'))
d = d.astype('float64')
df.head()
Out[18]:
In [32]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
with plt.style.context('fivethirtyeight'):
plt.hist(d.values, bins=50, alpha=0.4)
plt.axvline(np.mean(d.values), ymin=0.0, ymax=600, linewidth=2, color='k', label='mean')
plt.axvline(x=np.median(d.values), ymin=0.0, ymax=600, linewidth=2, color='b', linestyle='--', label='median')
plt.legend(loc='upper right')
plt.ylabel('Number of Employees')
plt.xlabel('Annual Salary')
plt.show()
In [34]:
data = np.random.normal(loc=0.0, scale=1.0, size=10000)
In [38]:
with plt.style.context('fivethirtyeight'):
plt.hist(data, bins=50, alpha=0.4)
plt.axvline(np.mean(data), ymin=0.0, ymax=600, linewidth=2, color='k', label='mean')
plt.axvline(x=np.median(data), ymin=0.0, ymax=600, linewidth=2, color='b', linestyle='--', label='median')
plt.legend(loc='upper right')
plt.ylabel('Count')
plt.xlabel('x')
plt.show()
In [39]:
np.mean(data)
Out[39]:
In [40]:
np.median(data)
Out[40]:
In [ ]: