In [17]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [18]:
'''
Load dataset into dataframe and display first row
'''
women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
print(women_degrees.iloc[0])


Year                             1970.000000
Agriculture                         4.229798
Architecture                       11.921005
Art and Performance                59.700000
Biology                            29.088363
Business                            9.064439
Communications and Journalism      35.300000
Computer Science                   13.600000
Education                          74.535328
Engineering                         0.800000
English                            65.570923
Foreign Languages                  73.800000
Health Professions                 77.100000
Math and Statistics                38.000000
Physical Sciences                  13.800000
Psychology                         44.400000
Public Administration              68.400000
Social Sciences and History        36.800000
Name: 0, dtype: float64

In [24]:
'''
plot the correlation between year and percent degrees granted to women in biology 

We can see that there is strong positive correlation between the two which means
as the time passed by the percent of degrees granted to women in biology increased
'''

fig,ax = plt.subplots()

ax.scatter(x=women_degrees['Biology'], y=women_degrees['Year'])


Out[24]:
<matplotlib.collections.PathCollection at 0x7f02b85b4fd0>

In [26]:
'''
same as above but as line chart
'''
plt.plot(women_degrees['Year'], women_degrees['Biology'])


Out[26]:
[<matplotlib.lines.Line2D at 0x7f02b84e2400>]

In [47]:
'''
let's display percentages for biology degree granted for both genders
on same plot
'''
plt.plot(women_degrees['Year'], women_degrees['Biology'], c='blue',label='Women')
plt.plot(women_degrees['Year'], 100-women_degrees['Biology'], c='green', label='Men')
plt.legend(loc='upper right')
plt.title('Percentage of Biology Degrees Awarded By Gender')
locs , labels = plt.xticks(np.arange(1970,2015,5))



In [54]:
'''
let's improve data-ink ratio by removing spines and tick marks

'''

plt.plot(women_degrees['Year'], women_degrees['Biology'], c='blue',label='Women')
plt.plot(women_degrees['Year'], 100-women_degrees['Biology'], c='green', label='Men')
plt.legend(loc='upper right')
plt.title('Percentage of Biology Degrees Awarded By Gender')
locs , labels = plt.xticks(np.arange(1970,2015,5))


ax = plt.gca()
#remove tick marks
ax.tick_params(bottom='off',left='off')
#remove spines
for key,val in ax.spines.items():
    val.set_visible(False)



In [58]:
'''
Let's plot gender gaps for four STEM degree categories

'''

major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
fig = plt.figure(figsize=(12, 12))

for sp in range(0,4):
    ax = fig.add_subplot(2,2,sp+1)
    ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women')
    ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c='green', label='Men')
    # Add your code here.
    for key,val in ax.spines.items():
        val.set_visible(False)
    ax.tick_params(bottom='off',left='off', top='off', right='off')
    ax.set_title(major_cats[sp])
    ax.set_xlim(1968, 2011)
    ax.set_ylim(0,100)

    
# Calling pyplot.legend() here will add the legend to the last subplot that was created.
plt.legend(loc='upper right')
plt.show()



In [ ]: