In [10]:
import pandas as pd
%matplotlib inline

In [11]:
recent_grads = pd.read_csv("../data/grad/recent-grads.csv")

In [12]:
recent_grads.head()


Out[12]:
Rank Major_code Major Major_category Total Sample_size Men Women ShareWomen Employed ... Part_time Full_time_year_round Unemployed Unemployment_rate Median P25th P75th College_jobs Non_college_jobs Low_wage_jobs
0 1 2419 PETROLEUM ENGINEERING Engineering 2339 36 2057 282 0.120564 1976 ... 270 1207 37 0.018381 110000 95000 125000 1534 364 193
1 2 2416 MINING AND MINERAL ENGINEERING Engineering 756 7 679 77 0.101852 640 ... 170 388 85 0.117241 75000 55000 90000 350 257 50
2 3 2415 METALLURGICAL ENGINEERING Engineering 856 3 725 131 0.153037 648 ... 133 340 16 0.024096 73000 50000 105000 456 176 0
3 4 2417 NAVAL ARCHITECTURE AND MARINE ENGINEERING Engineering 1258 16 1123 135 0.107313 758 ... 150 692 40 0.050125 70000 43000 80000 529 102 0
4 5 2405 CHEMICAL ENGINEERING Engineering 32260 289 21239 11021 0.341631 25694 ... 5180 16697 1672 0.061098 65000 50000 75000 18314 4440 972

5 rows × 21 columns


In [13]:
recent_grads.hist('Median')


Out[13]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x112d6e198>]], dtype=object)

In [14]:
recent_grads.hist('Median', bins=20, grid=False)


Out[14]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x112dd05c0>]], dtype=object)

In [19]:
columns = ['Median', 'Sample_size']
recent_grads.hist(column=columns, layout=(2,1) ,grid=False)


Out[19]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x112341320>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1133c0358>]], dtype=object)

In [21]:
import matplotlib.pyplot as plt
sample_size = recent_grads[['Sample_size', 'Major_category']]
sample_size.boxplot(by='Major_category')
plt.xticks(rotation=90)


Out[21]:
(array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16]),
 <a list of 16 Text xticklabel objects>)

In [22]:
import matplotlib.pyplot as plt
plt.scatter(recent_grads['Unemployment_rate'], recent_grads['Median'], color='red')
plt.scatter(recent_grads['ShareWomen'], recent_grads['Median'], color='blue')
plt.show()



In [ ]: