In [17]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
#full data frame with all data
df=pd.read_excel('first survey - final (Responses).xlsx',sheetname=1,skiprows=1,keep_default_na=False)
#df=pd.read_excel('survey_test.xls',sheetname=1)
#get the column names
c=df.columns
#we have a problem where 'N/A' is assigned the "Not a number" type, the NaN, which is excluded from other functions.
#code here acts to fix this so that these values are exp
#select the subset of the questions that are about wages
cwages=c[13:18]
print(cwages)
In [2]:
#df[cwages[0]][df[cwages[0]].isnull()]='N/A'
print(df[cwages[0]].value_counts())
In [3]:
df[cwages[1]].value_counts().plot.barh()
plt.show()
In [4]:
#make a bar plot of all wage questions
f0=plt.figure(0)
for ii in range(0,len(cwages)):
plt.subplot(len(cwages),1,ii+1)
df[cwages[ii]].value_counts().plot.barh()
plt.title(cwages[ii])
f0.subplots_adjust(hspace=1.2)
plt.show()
In [5]:
#make a bar plot of just the response from the humanities folks
f1=plt.figure(1)
for ii in range(0,len(cwages)):
plt.subplot(len(cwages),1,ii+1)
df.where(df['Which division(s) have you done Unit 1 work in?']=='Humanities')[cwages[ii]].value_counts().plot.barh()
plt.title(cwages[ii])
f1.subplots_adjust(hspace=1.2)
plt.show()
In [10]:
df['Gender identity'].value_counts()
Out[10]:
In [ ]:
In [55]:
df['G']='N'
df['G'][df['Gender identity']=='Man']='M'
df['G'][df['Gender identity']=='Cis, Man']='M'
df['G'][df['Gender identity']=='Woman']='W'
df['G'][df['Gender identity']=='Cis, Woman']='W'
print(df['G'].value_counts())
In [73]:
df2=df.groupby(df['G'])
df2[cwages[0]].value_counts().plot.bar(stacked=True)
#df2[cwages[0]].value_counts().plot(kind='bar', stacked=False)
plt.show()