In [17]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
In [18]:
'''
Load dataset into dataframe and display first row
'''
women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
print(women_degrees.iloc[0])
In [24]:
'''
plot the correlation between year and percent degrees granted to women in biology
We can see that there is strong positive correlation between the two which means
as the time passed by the percent of degrees granted to women in biology increased
'''
fig,ax = plt.subplots()
ax.scatter(x=women_degrees['Biology'], y=women_degrees['Year'])
Out[24]:
In [26]:
'''
same as above but as line chart
'''
plt.plot(women_degrees['Year'], women_degrees['Biology'])
Out[26]:
In [47]:
'''
let's display percentages for biology degree granted for both genders
on same plot
'''
plt.plot(women_degrees['Year'], women_degrees['Biology'], c='blue',label='Women')
plt.plot(women_degrees['Year'], 100-women_degrees['Biology'], c='green', label='Men')
plt.legend(loc='upper right')
plt.title('Percentage of Biology Degrees Awarded By Gender')
locs , labels = plt.xticks(np.arange(1970,2015,5))
In [54]:
'''
let's improve data-ink ratio by removing spines and tick marks
'''
plt.plot(women_degrees['Year'], women_degrees['Biology'], c='blue',label='Women')
plt.plot(women_degrees['Year'], 100-women_degrees['Biology'], c='green', label='Men')
plt.legend(loc='upper right')
plt.title('Percentage of Biology Degrees Awarded By Gender')
locs , labels = plt.xticks(np.arange(1970,2015,5))
ax = plt.gca()
#remove tick marks
ax.tick_params(bottom='off',left='off')
#remove spines
for key,val in ax.spines.items():
val.set_visible(False)
In [58]:
'''
Let's plot gender gaps for four STEM degree categories
'''
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
fig = plt.figure(figsize=(12, 12))
for sp in range(0,4):
ax = fig.add_subplot(2,2,sp+1)
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women')
ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c='green', label='Men')
# Add your code here.
for key,val in ax.spines.items():
val.set_visible(False)
ax.tick_params(bottom='off',left='off', top='off', right='off')
ax.set_title(major_cats[sp])
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# Calling pyplot.legend() here will add the legend to the last subplot that was created.
plt.legend(loc='upper right')
plt.show()
In [ ]: