In [10]:
import os
curr_dir = os.getcwd()
data_path = curr_dir + '/2015-wmpd-tables'
os.chdir(data_path)
In [11]:
import pandas as pd
df = pd.read_excel("tab4-2.xlsx",header=1)
total_df,us_citizen_df = df[1:26], df[28:53]
white_df = df[55:80]
asian_pacific_island_df = df[82:107]
black_df = df[109:134]
hispanic_df = df[136:161]
american_indian_df = df[163:188]
other_df = df[190:215]
In [12]:
def table_clean_up(df,field_name,race,degree_type):
df = df.rename(index=str,columns={'Field, citizenship, and race or ethnicity': field_name})
df = df[pd.notnull(df[field_name])]
df['Ethnicity'] = pd.Series(race,index=df.index)
df = pd.melt(df,id_vars=["Field","Ethnicity"],var_name="Year",value_name="Count")
df['Degree Type'] = pd.Series(degree_type,index=df.index)
return df
In [13]:
white_df = table_clean_up(white_df,"Field", "White","Associate")
asian_pacific_island_df =table_clean_up(asian_pacific_island_df,"Field","Asian/Pacific Islander","Associate")
black_df = table_clean_up(black_df,"Field", 'Black',"Associate")
hispanic_df = table_clean_up(hispanic_df,"Field","Hispanic","Associate")
american_indian_df = table_clean_up(american_indian_df,"Field", "Amerian Indian","Associate")
other_df = table_clean_up(other_df,"Field","Other","Associate")
In [14]:
frames = [white_df,asian_pacific_island_df,black_df,hispanic_df,american_indian_df,other_df]
associate_df = pd.concat(frames)
associate_df
Out[14]:
In [15]:
df = pd.read_excel("tab5-3.xlsx",header=1)
total_df,us_citizen_df = df[1:46], df[48:93]
white_df = df[95:140]
asian_pacific_island_df = df[142:187]
black_df = df[189:234]
hispanic_df = df[236:281]
american_indian_df = df[283:328]
other_df = df[330:375]
In [16]:
white_df = table_clean_up(white_df,"Field", "White","Bachelor")
asian_pacific_island_df =table_clean_up(asian_pacific_island_df,"Field","Asian/Pacific Islander","Bachelor")
black_df = table_clean_up(black_df,"Field", 'Black',"Bachelor")
hispanic_df = table_clean_up(hispanic_df,"Field","Hispanic","Bachelor")
american_indian_df = table_clean_up(american_indian_df,"Field", "Amerian Indian","Bachelor")
other_df = table_clean_up(other_df,"Field","Other","Bachelor")
In [17]:
frames = [white_df,asian_pacific_island_df,black_df,hispanic_df,american_indian_df,other_df]
bachelor_df = pd.concat(frames)
bachelor_df
Out[17]:
In [18]:
df = pd.read_excel("tab6-3.xlsx",header=1)
total_df,us_citizen_df = df[1:46], df[48:93]
white_df = df[95:140]
asian_pacific_island_df = df[142:187]
black_df = df[189:234]
hispanic_df = df[236:281]
american_indian_df = df[283:328]
other_df = df[330:375]
In [19]:
white_df = table_clean_up(white_df,"Field", "White","Masters")
asian_pacific_island_df =table_clean_up(asian_pacific_island_df,"Field","Asian/Pacific Islander","Masters")
black_df = table_clean_up(black_df,"Field", 'Black',"Masters")
hispanic_df = table_clean_up(hispanic_df,"Field","Hispanic","Masters")
american_indian_df = table_clean_up(american_indian_df,"Field", "Amerian Indian","Masters")
other_df = table_clean_up(other_df,"Field","Other","Masters")
In [20]:
frames = [white_df,asian_pacific_island_df,black_df,hispanic_df,american_indian_df,other_df]
masters_df = pd.concat(frames)
masters_df
Out[20]:
In [21]:
frames = [associate_df,bachelor_df,masters_df]
final_df = pd.concat(frames)
final_df.to_csv("degrees_awarded.csv")
final_df.to_excel("degrees_awarded.xlsx")