In [10]:
import os

curr_dir = os.getcwd()
data_path = curr_dir + '/2015-wmpd-tables'
os.chdir(data_path)


---------------------------------------------------------------------------
WindowsError                              Traceback (most recent call last)
<ipython-input-10-0e201301d400> in <module>()
      3 curr_dir = os.getcwd()
      4 data_path = curr_dir + '/2015-wmpd-tables'
----> 5 os.chdir(data_path)

WindowsError: [Error 2] The system cannot find the file specified: 'C:\\Users\\ransf\\Documents\\Udacity\\DataVisualization\\2015-wmpd-tables/2015-wmpd-tables'

In [11]:
import pandas as pd

df = pd.read_excel("tab4-2.xlsx",header=1)
total_df,us_citizen_df = df[1:26], df[28:53]
white_df = df[55:80]

asian_pacific_island_df = df[82:107]

black_df = df[109:134]
hispanic_df = df[136:161]

american_indian_df = df[163:188]
other_df = df[190:215]

In [12]:
def table_clean_up(df,field_name,race,degree_type):
    df = df.rename(index=str,columns={'Field, citizenship, and race or ethnicity': field_name})
    df = df[pd.notnull(df[field_name])]
    df['Ethnicity'] = pd.Series(race,index=df.index)
    df = pd.melt(df,id_vars=["Field","Ethnicity"],var_name="Year",value_name="Count")
    df['Degree Type'] = pd.Series(degree_type,index=df.index)
    return df

In [13]:
white_df = table_clean_up(white_df,"Field", "White","Associate")
asian_pacific_island_df =table_clean_up(asian_pacific_island_df,"Field","Asian/Pacific Islander","Associate")
black_df = table_clean_up(black_df,"Field", 'Black',"Associate")
hispanic_df = table_clean_up(hispanic_df,"Field","Hispanic","Associate")
american_indian_df = table_clean_up(american_indian_df,"Field", "Amerian Indian","Associate")
other_df = table_clean_up(other_df,"Field","Other","Associate")

In [14]:
frames = [white_df,asian_pacific_island_df,black_df,hispanic_df,american_indian_df,other_df]

associate_df = pd.concat(frames)
associate_df


Out[14]:
Field Ethnicity Year Count Degree Type
0 All fields White 2002 401199 Associate
1 S&E White 2002 31883 Associate
2 Science White 2002 30618 Associate
3 Agricultural sciences White 2002 1725 Associate
4 Biological sciences White 2002 1047 Associate
5 Computer sciences White 2002 21933 Associate
6 Earth, atmospheric, and ocean sciences White 2002 54 Associate
7 Mathematics and statistics White 2002 384 Associate
8 Physical sciences White 2002 891 Associate
9 Psychology White 2002 1121 Associate
10 Social sciences White 2002 3463 Associate
11 Engineering White 2002 1265 Associate
12 Non-S&E White 2002 369316 Associate
13 S&E technologies White 2002 52956 Associate
14 Science technologies White 2002 728 Associate
15 Engineering technologies White 2002 25386 Associate
16 Health technologies White 2002 26810 Associate
17 Other S&E technologies White 2002 32 Associate
18 Other non-S&Ea White 2002 316360 Associate
19 All fields White 2003 417683 Associate
20 S&E White 2003 37939 Associate
21 Science White 2003 36552 Associate
22 Agricultural sciences White 2003 1795 Associate
23 Biological sciences White 2003 1132 Associate
24 Computer sciences White 2003 28012 Associate
25 Earth, atmospheric, and ocean sciences White 2003 48 Associate
26 Mathematics and statistics White 2003 374 Associate
27 Physical sciences White 2003 688 Associate
28 Psychology White 2003 1121 Associate
29 Social sciences White 2003 3382 Associate
... ... ... ... ... ...
179 Physical sciences Other 2011 259 Associate
180 Psychology Other 2011 276 Associate
181 Social sciences Other 2011 2869 Associate
182 Engineering Other 2011 159 Associate
183 Non-S&E Other 2011 69617 Associate
184 S&E technologies Other 2011 13043 Associate
185 Science technologies Other 2011 92 Associate
186 Engineering technologies Other 2011 2828 Associate
187 Health technologies Other 2011 10071 Associate
188 Other S&E technologies Other 2011 52 Associate
189 Other non-S&Ea Other 2011 56574 Associate
190 All fields Other 2012 86843 Associate
191 S&E Other 2012 10945 Associate
192 Science Other 2012 10728 Associate
193 Agricultural sciences Other 2012 125 Associate
194 Biological sciences Other 2012 274 Associate
195 Computer sciences Other 2012 6097 Associate
196 Earth, atmospheric, and ocean sciences Other 2012 4 Associate
197 Mathematics and statistics Other 2012 95 Associate
198 Physical sciences Other 2012 267 Associate
199 Psychology Other 2012 372 Associate
200 Social sciences Other 2012 3494 Associate
201 Engineering Other 2012 217 Associate
202 Non-S&E Other 2012 75898 Associate
203 S&E technologies Other 2012 16436 Associate
204 Science technologies Other 2012 102 Associate
205 Engineering technologies Other 2012 5061 Associate
206 Health technologies Other 2012 11227 Associate
207 Other S&E technologies Other 2012 46 Associate
208 Other non-S&Ea Other 2012 59462 Associate

1254 rows × 5 columns


In [15]:
df = pd.read_excel("tab5-3.xlsx",header=1)
total_df,us_citizen_df = df[1:46], df[48:93]
white_df = df[95:140]

asian_pacific_island_df = df[142:187]

black_df = df[189:234]
hispanic_df = df[236:281]

american_indian_df = df[283:328]
other_df = df[330:375]

In [16]:
white_df = table_clean_up(white_df,"Field", "White","Bachelor")
asian_pacific_island_df =table_clean_up(asian_pacific_island_df,"Field","Asian/Pacific Islander","Bachelor")
black_df = table_clean_up(black_df,"Field", 'Black',"Bachelor")
hispanic_df = table_clean_up(hispanic_df,"Field","Hispanic","Bachelor")
american_indian_df = table_clean_up(american_indian_df,"Field", "Amerian Indian","Bachelor")
other_df = table_clean_up(other_df,"Field","Other","Bachelor")

In [17]:
frames = [white_df,asian_pacific_island_df,black_df,hispanic_df,american_indian_df,other_df]

bachelor_df = pd.concat(frames)
bachelor_df


Out[17]:
Field Ethnicity Year Count Degree Type
0 All fields White 2002 914692 Bachelor
1 S&E White 2002 276403 Bachelor
2 Science White 2002 236710 Bachelor
3 Agricultural sciences White 2002 14728 Bachelor
4 Biological sciences White 2002 41212 Bachelor
5 Computer sciences White 2002 27696 Bachelor
6 Earth, atmospheric, and ocean sciences White 2002 3450 Bachelor
7 Atmospheric sciences White 2002 467 Bachelor
8 Earth sciences White 2002 2806 Bachelor
9 Ocean sciences White 2002 177 Bachelor
10 Mathematics and statistics White 2002 8738 Bachelor
11 Physical sciences White 2002 9687 Bachelor
12 Astronomy White 2002 204 Bachelor
13 Chemistry White 2002 6260 Bachelor
14 Physics White 2002 2736 Bachelor
15 Other White 2002 487 Bachelor
16 Psychology White 2002 53318 Bachelor
17 Social sciences White 2002 77881 Bachelor
18 Anthropology White 2002 5380 Bachelor
19 Area and ethnic studies White 2002 3069 Bachelor
20 Economics White 2002 13371 Bachelor
21 History of science White 2002 60 Bachelor
22 Linguistics White 2002 571 Bachelor
23 Political science and public\n administration White 2002 26150 Bachelor
24 Sociology White 2002 15817 Bachelor
25 Other White 2002 13463 Bachelor
26 Engineering White 2002 39693 Bachelor
27 Aerospace engineering White 2002 1279 Bachelor
28 Chemical engineering White 2002 3976 Bachelor
29 Civil engineering White 2002 6624 Bachelor
... ... ... ... ... ...
366 Earth, atmospheric, and ocean sciences Other 2012 354 Bachelor
367 Atmospheric sciences Other 2012 29 Bachelor
368 Earth sciences Other 2012 313 Bachelor
369 Ocean sciences Other 2012 12 Bachelor
370 Mathematics and statistics Other 2012 1172 Bachelor
371 Physical sciences Other 2012 1425 Bachelor
372 Astronomy Other 2012 33 Bachelor
373 Chemistry Other 2012 845 Bachelor
374 Physics Other 2012 453 Bachelor
375 Other Other 2012 94 Bachelor
376 Psychology Other 2012 8191 Bachelor
377 Social sciences Other 2012 15693 Bachelor
378 Anthropology Other 2012 1064 Bachelor
379 Area and ethnic studies Other 2012 769 Bachelor
380 Economics Other 2012 2196 Bachelor
381 History of science Other 2012 11 Bachelor
382 Linguistics Other 2012 205 Bachelor
383 Political science and public\n administration Other 2012 5816 Bachelor
384 Sociology Other 2012 2150 Bachelor
385 Other Other 2012 3482 Bachelor
386 Engineering Other 2012 4637 Bachelor
387 Aerospace engineering Other 2012 182 Bachelor
388 Chemical engineering Other 2012 413 Bachelor
389 Civil engineering Other 2012 774 Bachelor
390 Electrical engineering Other 2012 983 Bachelor
391 Industrial engineering Other 2012 207 Bachelor
392 Materials engineering Other 2012 104 Bachelor
393 Mechanical engineering Other 2012 1224 Bachelor
394 Other Other 2012 750 Bachelor
395 Non-S&E Other 2012 94937 Bachelor

2376 rows × 5 columns


In [18]:
df = pd.read_excel("tab6-3.xlsx",header=1)
total_df,us_citizen_df = df[1:46], df[48:93]
white_df = df[95:140]

asian_pacific_island_df = df[142:187]

black_df = df[189:234]
hispanic_df = df[236:281]

american_indian_df = df[283:328]
other_df = df[330:375]

In [19]:
white_df = table_clean_up(white_df,"Field", "White","Masters")
asian_pacific_island_df =table_clean_up(asian_pacific_island_df,"Field","Asian/Pacific Islander","Masters")
black_df = table_clean_up(black_df,"Field", 'Black',"Masters")
hispanic_df = table_clean_up(hispanic_df,"Field","Hispanic","Masters")
american_indian_df = table_clean_up(american_indian_df,"Field", "Amerian Indian","Masters")
other_df = table_clean_up(other_df,"Field","Other","Masters")

In [20]:
frames = [white_df,asian_pacific_island_df,black_df,hispanic_df,american_indian_df,other_df]

masters_df = pd.concat(frames)
masters_df


Out[20]:
Field Ethnicity Year Count Degree Type
0 All fields White 2002 299441 Masters
1 S&E White 2002 48583 Masters
2 Science White 2002 38246 Masters
3 Agricultural sciences White 2002 2951 Masters
4 Biological sciences White 2002 4088 Masters
5 Computer sciences White 2002 4828 Masters
6 Earth, atmospheric, and ocean sciences White 2002 1065 Masters
7 Atmospheric sciences White 2002 98 Masters
8 Earth sciences White 2002 890 Masters
9 Ocean sciences White 2002 77 Masters
10 Mathematics and statistics White 2002 1555 Masters
11 Physical sciences White 2002 1783 Masters
12 Astronomy White 2002 59 Masters
13 Chemistry White 2002 877 Masters
14 Physics White 2002 613 Masters
15 Other White 2002 234 Masters
16 Psychology White 2002 9452 Masters
17 Social sciences White 2002 12524 Masters
18 Anthropology White 2002 705 Masters
19 Area and ethnic studies White 2002 763 Masters
20 Economics White 2002 827 Masters
21 History of science White 2002 23 Masters
22 Linguistics White 2002 270 Masters
23 Political science and public administration White 2002 7066 Masters
24 Sociology White 2002 1183 Masters
25 Other White 2002 1687 Masters
26 Engineering White 2002 10337 Masters
27 Aerospace engineering White 2002 310 Masters
28 Chemical engineering White 2002 435 Masters
29 Civil engineering White 2002 1930 Masters
... ... ... ... ... ...
366 Earth, atmospheric, and ocean sciences Other 2012 139 Masters
367 Atmospheric sciences Other 2012 11 Masters
368 Earth sciences Other 2012 117 Masters
369 Ocean sciences Other 2012 11 Masters
370 Mathematics and statistics Other 2012 419 Masters
371 Physical sciences Other 2012 263 Masters
372 Astronomy Other 2012 10 Masters
373 Chemistry Other 2012 126 Masters
374 Physics Other 2012 113 Masters
375 Other Other 2012 14 Masters
376 Psychology Other 2012 3054 Masters
377 Social sciences Other 2012 4765 Masters
378 Anthropology Other 2012 141 Masters
379 Area and ethnic studies Other 2012 143 Masters
380 Economics Other 2012 358 Masters
381 History of science Other 2012 3 Masters
382 Linguistics Other 2012 62 Masters
383 Political science and public administration Other 2012 3419 Masters
384 Sociology Other 2012 157 Masters
385 Other Other 2012 482 Masters
386 Engineering Other 2012 2414 Masters
387 Aerospace engineering Other 2012 59 Masters
388 Chemical engineering Other 2012 81 Masters
389 Civil engineering Other 2012 364 Masters
390 Electrical engineering Other 2012 501 Masters
391 Industrial engineering Other 2012 333 Masters
392 Materials engineering Other 2012 49 Masters
393 Mechanical engineering Other 2012 293 Masters
394 Other Other 2012 734 Masters
395 Non-S&E Other 2012 65909 Masters

2376 rows × 5 columns


In [21]:
frames = [associate_df,bachelor_df,masters_df]

final_df = pd.concat(frames)

final_df.to_csv("degrees_awarded.csv")
final_df.to_excel("degrees_awarded.xlsx")