Q3


In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('extract_medium.csv',sep=';')

In [3]:
data.head(1)


Out[3]:
State House_id Weight House_relation Sex Age Race Marriage Education Ancestry Language Employment_status Traveltime Industry Occupation Hours Weeks Salary Income Earnings
0 Arizona 3399818 18 3 2 14 8 5 4 210 0 0 0 0 0 0 0 0 0 0

In [4]:
table = pd.pivot_table(data,values='Earnings',index=['Sex', 'Education'],aggfunc=np.mean)
##OR###
temp1 = data.groupby(['Sex', 'Education']).Earnings.mean()

In [5]:
temp1


Out[5]:
Sex  Education
1    0                0.000000
     1             3504.784689
     2              690.859232
     3             5083.671988
     4             4073.961606
     5             9596.498516
     6            11185.848485
     7            11167.638889
     8            19404.356436
     9            24012.275826
     10           28201.210614
     11           28488.347335
     12           35081.001821
     13           53294.264282
     14           70755.173611
     15           94245.204918
     16           61467.676768
2    0                0.000000
     1              947.790323
     2              387.342995
     3             2771.055195
     4             2256.137405
     5             3326.657609
     6             4281.338798
     7             5003.164557
     8             9200.885781
     9            11515.832571
     10           16305.045514
     11           16949.317489
     12           21991.480263
     13           33193.713496
     14           44412.231834
     15           46821.961290
     16           41476.065574
Name: Earnings, dtype: float64

In [6]:
temp1 = temp1.values
test=temp1.reshape(2,17)

In [7]:
test.shape


Out[7]:
(2, 17)

In [8]:
plt.pcolor(test,cmap=plt.cm.Reds,vmin=np.min(test), vmax=np.max(test))
plt.yticks([0,1],range(3))
plt.xticks(range(17),range(17))
plt.title('Heat map of average Earnings Gender Vs Education')
plt.show()
plt.close()


/home/aqeel/.local/lib/python2.7/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):

In [ ]: