In [1]:

    
import pandas as pd
# pd.set_option('max_colwidth', 50)
# set this if you need to



In [46]:

    
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

The Health Department has developed an inspection report and scoring system. After conducting an inspection of the facility, the Health Inspector calculates a score based on the violations observed. Violations can fall into:

high risk category: records specific violations that directly relate to the transmission of food borne illnesses,the adulteration of food products and the contamination of food-contact surfaces.
moderate risk category: records specific violations that are of a moderate risk to the public health and safety.
low risk category: records violations that are low risk or have no immediate risk to the public health and safety.



In [3]:

    
businesses = pd.read_csv('./data/businesses_plus.csv', parse_dates=True, dtype={'phone_number': str})
businesses.head()
# dtype casts the column as a specific data type









    Out[3]:






  
    
      
      business_id
      name
      address
      city
      postal_code
      latitude
      longitude
      phone_number
      TaxCode
      business_certificate
      application_date
      owner_name
      owner_address
      owner_city
      owner_state
      owner_zip
    
  
  
    
      0
      10
      Tiramisu Kitchen
      033 Belden Pl
      San Francisco
      94104
      37.791116
      -122.403816
      NaN
      H24
      779059.0
      NaN
      Tiramisu LLC
      33 Belden St
      San Francisco
      CA
      94104
    
    
      1
      19
      Nrgize Lifestyle Cafe
      1200 Van Ness Ave, 3rd Floor
      San Francisco
      94109
      37.786848
      -122.421547
      NaN
      H24
      NaN
      NaN
      24 Hour Fitness Inc
      1200 Van Ness Ave, 3rd Floor
      San Francisco
      CA
      94109
    
    
      2
      24
      OMNI S.F. Hotel - 2nd Floor Pantry
      500 California St, 2nd  Floor
      San Francisco
      94104
      37.792888
      -122.403135
      NaN
      H24
      352312.0
      NaN
      OMNI San Francisco Hotel Corp
      500 California St, 2nd Floor
      San Francisco
      CA
      94104
    
    
      3
      31
      Norman's Ice Cream and Freezes
      2801 Leavenworth St
      San Francisco
      94133
      37.807155
      -122.419004
      NaN
      H24
      346882.0
      NaN
      Norman Antiforda
      2801 Leavenworth St
      San Francisco
      CA
      94133
    
    
      4
      45
      CHARLIE'S DELI CAFE
      3202 FOLSOM St
      S.F.
      94110
      37.747114
      -122.413641
      NaN
      H24
      340024.0
      10/10/2001
      HARB, CHARLES AND KRISTIN
      1150 SANCHEZ
      S.F.
      CA
      94114



In [52]:

    
inspections = pd.read_csv('./data/inspections_plus.csv', parse_dates = ['date'])
inspections.head()









    Out[52]:






  
    
      
      business_id
      Score
      date
      type
    
  
  
    
      0
      10
      NaN
      2014-08-07
      Reinspection/Followup
    
    
      1
      10
      94.0
      2014-07-29
      Routine - Unscheduled
    
    
      2
      10
      NaN
      2014-01-24
      Reinspection/Followup
    
    
      3
      10
      92.0
      2014-01-14
      Routine - Unscheduled
    
    
      4
      19
      94.0
      2014-11-10
      Routine - Unscheduled



In [53]:

    
inspections.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27285 entries, 0 to 27284
Data columns (total 4 columns):
business_id    27285 non-null int64
Score          15263 non-null float64
date           27285 non-null datetime64[ns]
type           27285 non-null object
dtypes: datetime64[ns](1), float64(1), int64(1), object(1)
memory usage: 852.7+ KB



In [5]:

    
violations = pd.read_csv('./data/violations_plus.csv', parse_dates=True)
violations.head()









    Out[5]:






  
    
      
      business_id
      date
      ViolationTypeID
      risk_category
      description
    
  
  
    
      0
      10
      20140114
      103154
      Low Risk
      Unclean or degraded floors walls or ceilings
    
    
      1
      10
      20140114
      103119
      Moderate Risk
      Inadequate and inaccessible handwashing facili...
    
    
      2
      10
      20140114
      103145
      Low Risk
      Improper storage of equipment utensils or linens
    
    
      3
      10
      20140729
      103129
      Moderate Risk
      Insufficient hot water or running water
    
    
      4
      10
      20140729
      103144
      Low Risk
      Unapproved or unmaintained equipment or utensils



In [6]:

    
# 1 Combine the three dataframes into one data frame called restaurant_scores
# Hint: http://pandas.pydata.org/pandas-docs/stable/merging.html

restaurant_scores = pd.merge(inspections, violations, on = ['business_id', 'date'])
restaurant_scores = pd.merge(businesses, restaurant_scores, on = 'business_id')
restaurant_scores.head()









    Out[6]:






  
    
      
      business_id
      name
      address
      city
      postal_code
      latitude
      longitude
      phone_number
      TaxCode
      business_certificate
      ...
      owner_address
      owner_city
      owner_state
      owner_zip
      Score
      date
      type
      ViolationTypeID
      risk_category
      description
    
  
  
    
      0
      10
      Tiramisu Kitchen
      033 Belden Pl
      San Francisco
      94104
      37.791116
      -122.403816
      NaN
      H24
      779059.0
      ...
      33 Belden St
      San Francisco
      CA
      94104
      94.0
      20140729
      Routine - Unscheduled
      103129
      Moderate Risk
      Insufficient hot water or running water
    
    
      1
      10
      Tiramisu Kitchen
      033 Belden Pl
      San Francisco
      94104
      37.791116
      -122.403816
      NaN
      H24
      779059.0
      ...
      33 Belden St
      San Francisco
      CA
      94104
      94.0
      20140729
      Routine - Unscheduled
      103144
      Low Risk
      Unapproved or unmaintained equipment or utensils
    
    
      2
      10
      Tiramisu Kitchen
      033 Belden Pl
      San Francisco
      94104
      37.791116
      -122.403816
      NaN
      H24
      779059.0
      ...
      33 Belden St
      San Francisco
      CA
      94104
      92.0
      20140114
      Routine - Unscheduled
      103154
      Low Risk
      Unclean or degraded floors walls or ceilings
    
    
      3
      10
      Tiramisu Kitchen
      033 Belden Pl
      San Francisco
      94104
      37.791116
      -122.403816
      NaN
      H24
      779059.0
      ...
      33 Belden St
      San Francisco
      CA
      94104
      92.0
      20140114
      Routine - Unscheduled
      103119
      Moderate Risk
      Inadequate and inaccessible handwashing facili...
    
    
      4
      10
      Tiramisu Kitchen
      033 Belden Pl
      San Francisco
      94104
      37.791116
      -122.403816
      NaN
      H24
      779059.0
      ...
      33 Belden St
      San Francisco
      CA
      94104
      92.0
      20140114
      Routine - Unscheduled
      103145
      Low Risk
      Improper storage of equipment utensils or linens
    
  

5 rows × 22 columns



In [7]:

    
# 2 Which ten business have had the most inspections?

inspections.business_id.value_counts().head(10)









    Out[7]:





1775     21
74374    20
2505     20
71618    19
74131    19
67154    19
71804    19
489      18
7747     18
65523    17
Name: business_id, dtype: int64



In [8]:

    
# 3 Group and count the inspections by type

inspections.type.value_counts()









    Out[8]:





Routine - Unscheduled                15481
Reinspection/Followup                 5386
New Ownership                         1877
Complaint                             1634
New Construction                      1613
Non-inspection site visit              840
Complaint Reinspection/Followup        163
Structural Inspection                  120
Foodborne Illness Investigation        100
Routine - Scheduled                     63
Administrative or Document Review        4
Special Event                            2
Multi-agency Investigation               2
Name: type, dtype: int64



In [55]:

    
# 4 Create a plot that shows number of inspections per month
# Bonus for creating a heatmap
# http://stanford.edu/~mwaskom/software/seaborn/generated/seaborn.heatmap.html?highlight=heatmap

inspections['date'].dt.strftime('%b').value_counts().plot()









    Out[55]:





<matplotlib.axes._subplots.AxesSubplot at 0x12549198>



In [10]:

    
# 5 Which zip code contains the most high risk violations?

restaurant_scores[restaurant_scores['risk_category'] == 'High Risk'].postal_code.value_counts().head(1)









    Out[10]:





94133    618
Name: postal_code, dtype: int64



In [11]:

    
# 6 If inspection is prompted by a change in restaurant ownership, 
# is the inspection more likely to be categorized as higher or lower risk?

restaurant_scores.risk_category[restaurant_scores['type'] == 'New Ownership'].value_counts()









    Out[11]:





Low Risk         553
Moderate Risk    419
High Risk        148
Name: risk_category, dtype: int64



In [12]:

    
# 7 Examining the descriptions, what is the most common violation?

violations.description.value_counts().head(1)









    Out[12]:





Unclean or degraded floors walls or ceilings    3395
Name: description, dtype: int64



In [56]:

    
# 8 Create a hist of the scores with 10 bins

inspections['Score'].hist(bins = 10)









    Out[56]:





<matplotlib.axes._subplots.AxesSubplot at 0x12514cc0>



In [71]:

    
# 9 Can you predict risk category based on the other features in this dataset? Higher scores are lower risk

restaurant_scores.risk_category[restaurant_scores['Score'] > 75].value_counts()









    Out[71]:





Low Risk         17840
Moderate Risk    11180
High Risk         3629
Name: risk_category, dtype: int64



In [76]:

    
# 10 Extra Credit:
# Use Instagram location API to find pictures taken at the lat, long of the most High Risk restaurant
# https://www.instagram.com/developer/endpoints/locations/

restaurant_scores[restaurant_scores['risk_category'] == 'High Risk'].head()









    Out[76]:






  
    
      
      business_id
      name
      address
      city
      postal_code
      latitude
      longitude
      phone_number
      TaxCode
      business_certificate
      ...
      owner_address
      owner_city
      owner_state
      owner_zip
      Score
      date
      type
      ViolationTypeID
      risk_category
      description
    
  
  
    
      48
      58
      Oasis Grill
      91 Drumm St
      San Francisco
      94111
      37.794483
      -122.396584
      NaN
      H24
      954377.0
      ...
      91  Drumm St
      San Francisco
      CA
      94111
      78.0
      20140725
      Routine - Unscheduled
      103109
      High Risk
      Unclean or unsanitary food contact surfaces
    
    
      49
      58
      Oasis Grill
      91 Drumm St
      San Francisco
      94111
      37.794483
      -122.396584
      NaN
      H24
      954377.0
      ...
      91  Drumm St
      San Francisco
      CA
      94111
      78.0
      20140725
      Routine - Unscheduled
      103114
      High Risk
      High risk vermin infestation
    
    
      67
      66
      STARBUCKS
      1800 IRVING St
      S.F.
      94122
      37.763578
      -122.477461
      NaN
      H24
      911744.0
      ...
      1800  IRVING St
      S.F.
      CA
      94122
      91.0
      20140519
      Routine - Unscheduled
      103112
      High Risk
      No hot water or running water
    
    
      70
      67
      Revolution Cafe
      3248 22nd St
      SF
      94110
      37.755419
      -122.419542
      NaN
      H24
      NaN
      ...
      3248 22nd St.
      SF
      CA
      94110
      87.0
      20160401
      Routine - Unscheduled
      103109
      High Risk
      Unclean or unsanitary food contact surfaces
    
    
      80
      73
      DINO'S UNCLE VITO
      2101 Fillmore St
      San Francisco
      94115
      37.788932
      -122.433895
      NaN
      H24
      186790.0
      ...
      \t2101 FILLMORE ST
      SAN FRANCISCO
      CA
      94115
      85.0
      20150717
      Routine - Unscheduled
      103103
      High Risk
      High risk food holding temperature
    
  

5 rows × 22 columns



In [78]:

    
#https://api.instagram.com/v1/locations/search?lat=37.794483&lng=-122.396584&access_token=145677507.717d650.794fe62f43014634985e6694cdabd845



In [16]:

    
############################
### A Little More Morbid ###
############################



In [17]:

    
killings = pd.read_csv('./data/police-killings.csv')
killings.head()









    Out[17]:






  
    
      
      Unnamed: 0
      name
      age
      gender
      raceethnicity
      month
      day
      year
      streetaddress
      city
      state
      lawenforcementagency
      cause
      armed
      county_income
    
  
  
    
      0
      0
      A'donte Washington
      16
      Male
      Black
      February
      23
      2015
      Clearview Ln
      Millbrook
      AL
      Millbrook Police Department
      Gunshot
      No
      54766
    
    
      1
      1
      Aaron Rutledge
      27
      Male
      White
      April
      2
      2015
      300 block Iris Park Dr
      Pineville
      LA
      Rapides Parish Sheriff's Office
      Gunshot
      No
      40930
    
    
      2
      2
      Aaron Siler
      26
      Male
      White
      March
      14
      2015
      22nd Ave and 56th St
      Kenosha
      WI
      Kenosha Police Department
      Gunshot
      No
      54930
    
    
      3
      3
      Aaron Valdez
      25
      Male
      Hispanic/Latino
      March
      11
      2015
      3000 Seminole Ave
      South Gate
      CA
      South Gate Police Department
      Gunshot
      Firearm
      55909
    
    
      4
      4
      Adam Jovicic
      29
      Male
      White
      March
      19
      2015
      364 Hiwood Ave
      Munroe Falls
      OH
      Kent Police Department
      Gunshot
      No
      49669



In [18]:

    
# 1. Make the following changed to column names:
# lawenforcementagency -> agency
# raceethnicity        -> race

killings.rename(columns = {'Lawenforcementagency': 'agency', 'raceethnicity': 'race'}, inplace = True)



In [19]:

    
# 2. Show the count of missing values in each column

killings.isnull().sum()









    Out[19]:





Unnamed: 0              0
name                    0
age                     0
gender                  0
race                    0
month                   0
day                     0
year                    0
streetaddress           4
city                    0
state                   0
lawenforcementagency    0
cause                   0
armed                   0
county_income           0
dtype: int64



In [20]:

    
# 3. replace each null value in the dataframe with the string "Unknown"

killings.fillna('Unknown', inplace = True)



In [21]:

    
# 4. How many killings were there so far in 2015?

killings[killings['year'] == 2015].name.count()









    Out[21]:





467



In [22]:

    
# 5. Of all killings, how many were male and how many female?

killings.gender.value_counts()









    Out[22]:





Male      445
Female     22
Name: gender, dtype: int64



In [23]:

    
# 6. How many killings were of unarmed people?

killings[killings['armed'] == 'No'].name.count()









    Out[23]:





102



In [24]:

    
# 7. What percentage of all killings were unarmed?

float(killings[killings['armed'] == 'No'].name.count()) / float(killings.name.count()) * 100









    Out[24]:





21.841541755888652



In [25]:

    
# 8. What are the 5 states with the most killings?

killings.state.value_counts().head()









    Out[25]:





CA    74
TX    46
FL    29
AZ    25
OK    22
Name: state, dtype: int64



In [26]:

    
# 9. Show a value counts of deaths for each race

killings.race.value_counts()









    Out[26]:





White                     236
Black                     135
Hispanic/Latino            67
Unknown                    15
Asian/Pacific Islander     10
Native American             4
Name: race, dtype: int64



In [57]:

    
# 10. Display a histogram of ages of all killings

killings['age'].hist(bins = 10)









    Out[57]:





<matplotlib.axes._subplots.AxesSubplot at 0xdc74390>



In [58]:

    
# 11. Show 6 histograms of ages by race

killings.age.hist(by = killings.race)









    Out[58]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x00000000128874E0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000012B61208>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x0000000012D3BB70>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000012E6E0F0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x0000000012F23A58>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000012FF86A0>]], dtype=object)



In [29]:

    
# 12. What is the average age of death by race?

killings.groupby('race').age.mean()









    Out[29]:





race
Asian/Pacific Islander    40.800000
Black                     34.044444
Hispanic/Latino           31.716418
Native American           27.750000
Unknown                   43.533333
White                     40.466102
Name: age, dtype: float64



In [59]:

    
# 13. Show a bar chart with counts of deaths every month

killings.month.value_counts().plot(kind = 'bar', title = 'Deaths per Month')
plt.xlabel('Month')
plt.ylabel('Deaths')









    Out[59]:





<matplotlib.text.Text at 0x131ff860>



In [31]:

    
###################
### Less Morbid ###
###################



In [32]:

    
majors = pd.read_csv('./data/college-majors.csv')
majors.head()









    Out[32]:






  
    
      
      Unnamed: 0
      Major_code
      Major
      Major_category
      Total
      Employed
      Employed_full_time_year_round
      Unemployed
      Unemployment_rate
      Median
      P25th
      P75th
    
  
  
    
      0
      0
      1100
      GENERAL AGRICULTURE
      Agriculture & Natural Resources
      128148
      90245
      74078
      2423
      0.026147
      50000
      34000
      80000.0
    
    
      1
      1
      1101
      AGRICULTURE PRODUCTION AND MANAGEMENT
      Agriculture & Natural Resources
      95326
      76865
      64240
      2266
      0.028636
      54000
      36000
      80000.0
    
    
      2
      2
      1102
      AGRICULTURAL ECONOMICS
      Agriculture & Natural Resources
      33955
      26321
      22810
      821
      0.030248
      63000
      40000
      98000.0
    
    
      3
      3
      1103
      ANIMAL SCIENCES
      Agriculture & Natural Resources
      103549
      81177
      64937
      3619
      0.042679
      46000
      30000
      72000.0
    
    
      4
      4
      1104
      FOOD SCIENCE
      Agriculture & Natural Resources
      24280
      17281
      12722
      894
      0.049188
      62000
      38500
      90000.0



In [33]:

    
# 1. Delete the columns (employed_full_time_year_round, major_code)

majors.drop(majors.columns[[6, 1]], axis = 1)









    Out[33]:






  
    
      
      Unnamed: 0
      Major
      Major_category
      Total
      Employed
      Unemployed
      Unemployment_rate
      Median
      P25th
      P75th
    
  
  
    
      0
      0
      GENERAL AGRICULTURE
      Agriculture & Natural Resources
      128148
      90245
      2423
      0.026147
      50000
      34000
      80000.0
    
    
      1
      1
      AGRICULTURE PRODUCTION AND MANAGEMENT
      Agriculture & Natural Resources
      95326
      76865
      2266
      0.028636
      54000
      36000
      80000.0
    
    
      2
      2
      AGRICULTURAL ECONOMICS
      Agriculture & Natural Resources
      33955
      26321
      821
      0.030248
      63000
      40000
      98000.0
    
    
      3
      3
      ANIMAL SCIENCES
      Agriculture & Natural Resources
      103549
      81177
      3619
      0.042679
      46000
      30000
      72000.0
    
    
      4
      4
      FOOD SCIENCE
      Agriculture & Natural Resources
      24280
      17281
      894
      0.049188
      62000
      38500
      90000.0
    
    
      5
      5
      PLANT SCIENCE AND AGRONOMY
      Agriculture & Natural Resources
      79409
      63043
      2070
      0.031791
      50000
      35000
      75000.0
    
    
      6
      6
      SOIL SCIENCE
      Agriculture & Natural Resources
      6586
      4926
      264
      0.050867
      63000
      39400
      88000.0
    
    
      7
      7
      MISCELLANEOUS AGRICULTURE
      Agriculture & Natural Resources
      8549
      6392
      261
      0.039230
      52000
      35000
      75000.0
    
    
      8
      8
      ENVIRONMENTAL SCIENCE
      Biology & Life Science
      106106
      87602
      4736
      0.051290
      52000
      38000
      75000.0
    
    
      9
      9
      FORESTRY
      Agriculture & Natural Resources
      69447
      48228
      2144
      0.042563
      58000
      40500
      80000.0
    
    
      10
      10
      NATURAL RESOURCES MANAGEMENT
      Agriculture & Natural Resources
      83188
      65937
      3789
      0.054341
      52000
      37100
      75000.0
    
    
      11
      11
      ARCHITECTURE
      Engineering
      294692
      216770
      20394
      0.085991
      63000
      40400
      93500.0
    
    
      12
      12
      AREA ETHNIC AND CIVILIZATION STUDIES
      Humanities & Liberal Arts
      103740
      75798
      5525
      0.067939
      46000
      32000
      71000.0
    
    
      13
      13
      COMMUNICATIONS
      Communications & Journalism
      987676
      790696
      54390
      0.064360
      50000
      35000
      80000.0
    
    
      14
      14
      JOURNALISM
      Communications & Journalism
      418104
      314438
      20754
      0.061917
      50000
      35000
      80000.0
    
    
      15
      15
      MASS MEDIA
      Communications & Journalism
      211213
      170474
      15431
      0.083005
      48000
      32000
      70000.0
    
    
      16
      16
      ADVERTISING AND PUBLIC RELATIONS
      Communications & Journalism
      186829
      147433
      10624
      0.067216
      50000
      34000
      75000.0
    
    
      17
      17
      COMMUNICATION TECHNOLOGIES
      Computers & Mathematics
      62141
      49609
      4609
      0.085009
      50000
      34500
      75000.0
    
    
      18
      18
      COMPUTER AND INFORMATION SYSTEMS
      Computers & Mathematics
      253782
      218248
      11945
      0.051891
      65000
      45000
      90000.0
    
    
      19
      19
      COMPUTER PROGRAMMING AND DATA PROCESSING
      Computers & Mathematics
      29317
      22828
      2265
      0.090264
      60000
      40000
      85000.0
    
    
      20
      20
      COMPUTER SCIENCE
      Computers & Mathematics
      783292
      656372
      34196
      0.049519
      78000
      51000
      105000.0
    
    
      21
      21
      INFORMATION SCIENCES
      Computers & Mathematics
      77805
      66393
      3704
      0.052841
      68000
      46200
      95000.0
    
    
      22
      22
      COMPUTER ADMINISTRATION MANAGEMENT AND SECURITY
      Computers & Mathematics
      39362
      32366
      2626
      0.075046
      55000
      40000
      80000.0
    
    
      23
      23
      COMPUTER NETWORKING AND TELECOMMUNICATIONS
      Computers & Mathematics
      51771
      44071
      2748
      0.058694
      55000
      36000
      80000.0
    
    
      24
      24
      COSMETOLOGY SERVICES AND CULINARY ARTS
      Industrial Arts & Consumer Services
      42325
      33388
      1941
      0.054941
      40000
      26200
      60000.0
    
    
      25
      25
      GENERAL EDUCATION
      Education
      1438867
      843693
      38742
      0.043904
      43000
      32000
      59000.0
    
    
      26
      26
      EDUCATIONAL ADMINISTRATION AND SUPERVISION
      Education
      4037
      3113
      0
      0.000000
      58000
      44750
      79000.0
    
    
      27
      27
      SCHOOL STUDENT COUNSELING
      Education
      2396
      1492
      169
      0.101746
      41000
      33200
      50000.0
    
    
      28
      28
      ELEMENTARY EDUCATION
      Education
      1446701
      819393
      32685
      0.038359
      40000
      31000
      50000.0
    
    
      29
      29
      MATHEMATICS TEACHER EDUCATION
      Education
      68808
      47203
      1610
      0.032983
      43000
      34000
      60000.0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      143
      143
      FILM VIDEO AND PHOTOGRAPHIC ARTS
      Arts
      133508
      107651
      10080
      0.085619
      47000
      30000
      70000.0
    
    
      144
      144
      ART HISTORY AND CRITICISM
      Humanities & Liberal Arts
      90852
      61295
      4185
      0.063913
      44500
      30000
      70000.0
    
    
      145
      145
      STUDIO ARTS
      Arts
      81008
      58799
      5372
      0.083714
      37600
      24900
      58000.0
    
    
      146
      146
      MISCELLANEOUS FINE ARTS
      Arts
      8511
      6431
      1190
      0.156147
      45000
      30000
      60000.0
    
    
      147
      147
      GENERAL MEDICAL AND HEALTH SERVICES
      Health
      104516
      78198
      4525
      0.054701
      50000
      35000
      73000.0
    
    
      148
      148
      COMMUNICATION DISORDERS SCIENCES AND SERVICES
      Health
      74977
      49393
      2407
      0.046467
      42000
      30000
      60000.0
    
    
      149
      149
      HEALTH AND MEDICAL ADMINISTRATIVE SERVICES
      Health
      108510
      85360
      5160
      0.057004
      50000
      35000
      75000.0
    
    
      150
      150
      MEDICAL ASSISTING SERVICES
      Health
      64316
      51279
      1660
      0.031357
      55000
      37000
      75000.0
    
    
      151
      151
      MEDICAL TECHNOLOGIES TECHNICIANS
      Health
      164990
      121479
      4564
      0.036210
      60000
      45000
      76000.0
    
    
      152
      152
      HEALTH AND MEDICAL PREPARATORY PROGRAMS
      Health
      32514
      19009
      1431
      0.070010
      50000
      34000
      85000.0
    
    
      153
      153
      NURSING
      Health
      1769892
      1325711
      36503
      0.026797
      62000
      48000
      80000.0
    
    
      154
      154
      PHARMACY PHARMACEUTICAL SCIENCES AND ADMINISTR...
      Health
      180084
      124058
      4414
      0.034358
      106000
      78000
      125000.0
    
    
      155
      155
      TREATMENT THERAPY PROFESSIONS
      Health
      252138
      199174
      5378
      0.026292
      61000
      40000
      80000.0
    
    
      156
      156
      COMMUNITY AND PUBLIC HEALTH
      Health
      56741
      42543
      3032
      0.066528
      47000
      32800
      70000.0
    
    
      157
      157
      MISCELLANEOUS HEALTH MEDICAL PROFESSIONS
      Health
      77647
      52610
      2978
      0.053573
      45000
      32000
      62000.0
    
    
      158
      158
      GENERAL BUSINESS
      Business
      2148712
      1580978
      85626
      0.051378
      60000
      40000
      95000.0
    
    
      159
      159
      ACCOUNTING
      Business
      1779219
      1335825
      75379
      0.053415
      65000
      42500
      100000.0
    
    
      160
      160
      ACTUARIAL SCIENCE
      Business
      9763
      7846
      466
      0.056064
      72000
      53000
      115000.0
    
    
      161
      161
      BUSINESS MANAGEMENT AND ADMINISTRATION
      Business
      3123510
      2354398
      147261
      0.058865
      58000
      39500
      86000.0
    
    
      162
      162
      OPERATIONS LOGISTICS AND E-COMMERCE
      Business
      57200
      47341
      2141
      0.043268
      65000
      45000
      90000.0
    
    
      163
      163
      BUSINESS ECONOMICS
      Business
      75547
      57983
      3816
      0.061749
      65000
      45000
      100000.0
    
    
      164
      164
      MARKETING AND MARKETING RESEARCH
      Business
      1114624
      890125
      51839
      0.055033
      56000
      38500
      90000.0
    
    
      165
      165
      FINANCE
      Business
      816548
      670681
      34166
      0.048473
      65000
      45000
      100000.0
    
    
      166
      166
      HUMAN RESOURCES AND PERSONNEL MANAGEMENT
      Business
      187274
      142879
      9241
      0.060748
      54000
      38000
      80000.0
    
    
      167
      167
      INTERNATIONAL BUSINESS
      Business
      86064
      66453
      5106
      0.071354
      54000
      38600
      80000.0
    
    
      168
      168
      HOSPITALITY MANAGEMENT
      Business
      200854
      163393
      8862
      0.051447
      49000
      33000
      70000.0
    
    
      169
      169
      MANAGEMENT INFORMATION SYSTEMS AND STATISTICS
      Business
      156673
      134478
      6186
      0.043977
      72000
      50000
      100000.0
    
    
      170
      170
      MISCELLANEOUS BUSINESS & MEDICAL ADMINISTRATION
      Business
      102753
      77471
      4308
      0.052679
      53000
      36000
      83000.0
    
    
      171
      171
      HISTORY
      Humanities & Liberal Arts
      712509
      478416
      33725
      0.065851
      50000
      35000
      80000.0
    
    
      172
      172
      UNITED STATES HISTORY
      Humanities & Liberal Arts
      17746
      11887
      943
      0.073500
      50000
      39000
      81000.0
    
  

173 rows × 10 columns



In [34]:

    
# 2. Show the cout of missing values in each column

majors.isnull().sum()









    Out[34]:





Unnamed: 0                       0
Major_code                       0
Major                            0
Major_category                   0
Total                            0
Employed                         0
Employed_full_time_year_round    0
Unemployed                       0
Unemployment_rate                0
Median                           0
P25th                            0
P75th                            0
dtype: int64



In [35]:

    
# 3. What are the top 10 highest paying majors?

majors.groupby('Major').Median.max().head(10)









    Out[35]:





Major
ACCOUNTING                               65000
ACTUARIAL SCIENCE                        72000
ADVERTISING AND PUBLIC RELATIONS         50000
AEROSPACE ENGINEERING                    80000
AGRICULTURAL ECONOMICS                   63000
AGRICULTURE PRODUCTION AND MANAGEMENT    54000
ANIMAL SCIENCES                          46000
ANTHROPOLOGY AND ARCHEOLOGY              43000
APPLIED MATHEMATICS                      70000
ARCHITECTURAL ENGINEERING                78000
Name: Median, dtype: int64



In [36]:

    
# 4. Plot the data from the last question in a bar chart, include proper title, and labels!



In [37]:

    
# 5. What is the average median salary for each major category?

majors.groupby('Major').Median.mean()









    Out[37]:





Major
ACCOUNTING                                       65000
ACTUARIAL SCIENCE                                72000
ADVERTISING AND PUBLIC RELATIONS                 50000
AEROSPACE ENGINEERING                            80000
AGRICULTURAL ECONOMICS                           63000
AGRICULTURE PRODUCTION AND MANAGEMENT            54000
ANIMAL SCIENCES                                  46000
ANTHROPOLOGY AND ARCHEOLOGY                      43000
APPLIED MATHEMATICS                              70000
ARCHITECTURAL ENGINEERING                        78000
ARCHITECTURE                                     63000
AREA ETHNIC AND CIVILIZATION STUDIES             46000
ART AND MUSIC EDUCATION                          42600
ART HISTORY AND CRITICISM                        44500
ASTRONOMY AND ASTROPHYSICS                       80000
ATMOSPHERIC SCIENCES AND METEOROLOGY             60000
BIOCHEMICAL SCIENCES                             53000
BIOLOGICAL ENGINEERING                           62000
BIOLOGY                                          51000
BIOMEDICAL ENGINEERING                           65000
BOTANY                                           50000
BUSINESS ECONOMICS                               65000
BUSINESS MANAGEMENT AND ADMINISTRATION           58000
CHEMICAL ENGINEERING                             86000
CHEMISTRY                                        59000
CIVIL ENGINEERING                                78000
CLINICAL PSYCHOLOGY                              45000
COGNITIVE SCIENCE AND BIOPSYCHOLOGY              53000
COMMERCIAL ART AND GRAPHIC DESIGN                46600
COMMUNICATION DISORDERS SCIENCES AND SERVICES    42000
                                                 ...  
PHILOSOPHY AND RELIGIOUS STUDIES                 45000
PHYSICAL AND HEALTH EDUCATION TEACHING           48400
PHYSICAL FITNESS PARKS RECREATION AND LEISURE    44000
PHYSICAL SCIENCES                                60000
PHYSICS                                          70000
PHYSIOLOGY                                       50000
PLANT SCIENCE AND AGRONOMY                       50000
POLITICAL SCIENCE AND GOVERNMENT                 58000
PRE-LAW AND LEGAL STUDIES                        48000
PSYCHOLOGY                                       45000
PUBLIC ADMINISTRATION                            56000
PUBLIC POLICY                                    60000
SCHOOL STUDENT COUNSELING                        41000
SCIENCE AND COMPUTER TEACHER EDUCATION           46000
SECONDARY TEACHER EDUCATION                      45000
SOCIAL PSYCHOLOGY                                47000
SOCIAL SCIENCE OR HISTORY TEACHER EDUCATION      45000
SOCIAL WORK                                      40000
SOCIOLOGY                                        47000
SOIL SCIENCE                                     63000
SPECIAL NEEDS EDUCATION                          42000
STATISTICS AND DECISION SCIENCE                  70000
STUDIO ARTS                                      37600
TEACHER EDUCATION: MULTIPLE LEVELS               40000
THEOLOGY AND RELIGIOUS VOCATIONS                 40000
TRANSPORTATION SCIENCES AND TECHNOLOGIES         67000
TREATMENT THERAPY PROFESSIONS                    61000
UNITED STATES HISTORY                            50000
VISUAL AND PERFORMING ARTS                       40000
ZOOLOGY                                          55000
Name: Median, dtype: int64



In [38]:

    
# 6. Show only the top 5 paying major categories

majors.groupby('Major').Median.mean().head()









    Out[38]:





Major
ACCOUNTING                          65000
ACTUARIAL SCIENCE                   72000
ADVERTISING AND PUBLIC RELATIONS    50000
AEROSPACE ENGINEERING               80000
AGRICULTURAL ECONOMICS              63000
Name: Median, dtype: int64



In [60]:

    
# 7. Plot a histogram of the distribution of median salaries

majors['Median'].hist(bins = 10)









    Out[60]:





<matplotlib.axes._subplots.AxesSubplot at 0x13954518>



In [61]:

    
# 8. Plot a histogram of the distribution of median salaries by major category

majors.Median.hist(by = majors.Major_category)









    Out[61]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000000001380D438>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000013CA9C50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000013EF9518>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000013F59C50>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x0000000014082D68>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000012F63080>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000014216160>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000000001430DAC8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000000001436ED68>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x00000000144747F0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000014519828>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000000001461F2B0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x0000000014650A20>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x00000000147C4B00>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x00000000148DB4A8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000000001497D6A0>]], dtype=object)



In [41]:

    
# 9. What are the top 10 most UNemployed majors?

majors[['Major', 'Unemployed']].sort_index(by = 'Unemployed', ascending = False).head(10)









    



C:\Users\sglembocki\AppData\Local\Continuum\Anaconda2\lib\site-packages\ipykernel\__main__.py:3: FutureWarning: by argument to sort_index is deprecated, pls use .sort_values(by=...)
  app.launch_new_instance()






    Out[41]:






  
    
      
      Major
      Unemployed
    
  
  
    
      161
      BUSINESS MANAGEMENT AND ADMINISTRATION
      147261
    
    
      158
      GENERAL BUSINESS
      85626
    
    
      114
      PSYCHOLOGY
      79066
    
    
      159
      ACCOUNTING
      75379
    
    
      13
      COMMUNICATIONS
      54390
    
    
      73
      ENGLISH LANGUAGE AND LITERATURE
      52248
    
    
      164
      MARKETING AND MARKETING RESEARCH
      51839
    
    
      132
      POLITICAL SCIENCE AND GOVERNMENT
      40376
    
    
      25
      GENERAL EDUCATION
      38742
    
    
      78
      BIOLOGY
      36757



In [42]:

    
# What are the unemployment rates?
majors['Unemployment_rate'] = (majors.Unemployed / (majors.Unemployed + majors.Employed) * 100)
majors[['Major', 'Unemployment_rate']].sort_index(by = 'Unemployment_rate', ascending = False)









    



C:\Users\sglembocki\AppData\Local\Continuum\Anaconda2\lib\site-packages\ipykernel\__main__.py:3: FutureWarning: by argument to sort_index is deprecated, pls use .sort_values(by=...)
  app.launch_new_instance()






    Out[42]:






  
    
      
      Major
      Unemployment_rate
    
  
  
    
      146
      MISCELLANEOUS FINE ARTS
      15.614749
    
    
      116
      CLINICAL PSYCHOLOGY
      10.271216
    
    
      93
      MILITARY TECHNOLOGIES
      10.179641
    
    
      27
      SCHOOL STUDENT COUNSELING
      10.174594
    
    
      77
      LIBRARY SCIENCE
      9.484299
    
    
      141
      VISUAL AND PERFORMING ARTS
      9.465800
    
    
      19
      COMPUTER PROGRAMMING AND DATA PROCESSING
      9.026422
    
    
      119
      SOCIAL PSYCHOLOGY
      8.733624
    
    
      104
      ASTRONOMY AND ASTROPHYSICS
      8.602151
    
    
      11
      ARCHITECTURE
      8.599113
    
    
      143
      FILM VIDEO AND PHOTOGRAPHIC ARTS
      8.561891
    
    
      17
      COMMUNICATION TECHNOLOGIES
      8.500867
    
    
      145
      STUDIO ARTS
      8.371383
    
    
      118
      INDUSTRIAL AND ORGANIZATIONAL PSYCHOLOGY
      8.362907
    
    
      15
      MASS MEDIA
      8.300476
    
    
      120
      MISCELLANEOUS PSYCHOLOGY
      8.200936
    
    
      67
      LINGUISTICS AND COMPARATIVE LANGUAGE AND LITER...
      8.134809
    
    
      139
      DRAMA AND THEATER ARTS
      8.027373
    
    
      123
      PUBLIC POLICY
      7.921692
    
    
      44
      BIOMEDICAL ENGINEERING
      7.903583
    
    
      128
      ANTHROPOLOGY AND ARCHEOLOGY
      7.817578
    
    
      76
      HUMANITIES
      7.784376
    
    
      94
      MULTI/INTERDISCIPLINARY STUDIES
      7.726897
    
    
      115
      EDUCATIONAL PSYCHOLOGY
      7.563114
    
    
      22
      COMPUTER ADMINISTRATION MANAGEMENT AND SECURITY
      7.504572
    
    
      101
      PHILOSOPHY AND RELIGIOUS STUDIES
      7.502034
    
    
      95
      INTERCULTURAL AND INTERNATIONAL STUDIES
      7.433013
    
    
      142
      COMMERCIAL ART AND GRAPHIC DESIGN
      7.391972
    
    
      74
      COMPOSITION AND RHETORIC
      7.361495
    
    
      172
      UNITED STATES HISTORY
      7.349961
    
    
      ...
      ...
      ...
    
    
      9
      FORESTRY
      4.256333
    
    
      102
      THEOLOGY AND RELIGIOUS VOCATIONS
      4.250511
    
    
      59
      PETROLEUM ENGINEERING
      4.220535
    
    
      32
      SCIENCE AND COMPUTER TEACHER EDUCATION
      4.219989
    
    
      41
      AEROSPACE ENGINEERING
      4.197131
    
    
      83
      GENETICS
      4.159095
    
    
      38
      ART AND MUSIC EDUCATION
      4.097337
    
    
      57
      NAVAL ARCHITECTURE AND MARINE ENGINEERING
      4.030882
    
    
      7
      MISCELLANEOUS AGRICULTURE
      3.923042
    
    
      39
      MISCELLANEOUS EDUCATION
      3.921524
    
    
      103
      PHYSICAL SCIENCES
      3.895254
    
    
      28
      ELEMENTARY EDUCATION
      3.835916
    
    
      105
      ATMOSPHERIC SCIENCES AND METEOROLOGY
      3.672631
    
    
      151
      MEDICAL TECHNOLOGIES TECHNICIANS
      3.620986
    
    
      154
      PHARMACY PHARMACEUTICAL SCIENCES AND ADMINISTR...
      3.435768
    
    
      80
      BOTANY
      3.402351
    
    
      36
      TEACHER EDUCATION: MULTIPLE LEVELS
      3.335686
    
    
      29
      MATHEMATICS TEACHER EDUCATION
      3.298302
    
    
      5
      PLANT SCIENCE AND AGRONOMY
      3.179089
    
    
      150
      MEDICAL ASSISTING SERVICES
      3.135684
    
    
      2
      AGRICULTURAL ECONOMICS
      3.024832
    
    
      1
      AGRICULTURE PRODUCTION AND MANAGEMENT
      2.863606
    
    
      153
      NURSING
      2.679682
    
    
      155
      TREATMENT THERAPY PROFESSIONS
      2.629160
    
    
      0
      GENERAL AGRICULTURE
      2.614711
    
    
      97
      MATHEMATICS AND COMPUTER SCIENCE
      2.490040
    
    
      111
      MATERIALS SCIENCE
      2.233333
    
    
      85
      PHARMACOLOGY
      1.611080
    
    
      26
      EDUCATIONAL ADMINISTRATION AND SUPERVISION
      0.000000
    
    
      51
      GEOLOGICAL AND GEOPHYSICAL ENGINEERING
      0.000000
    
  

173 rows × 2 columns



In [43]:

    
# 10. What are the top 10 most UNemployed majors CATEGORIES? Use the mean for each category
# What are the unemployment rates?

majors.groupby('Major_category').Unemployment_rate.mean()









    Out[43]:





Major_category
Agriculture & Natural Resources        3.956918
Arts                                   8.760052
Biology & Life Science                 4.993597
Business                               5.449602
Communications & Journalism            6.912452
Computers & Mathematics                5.943698
Education                              4.676196
Engineering                            5.063002
Health                                 4.720928
Humanities & Liberal Arts              6.942871
Industrial Arts & Consumer Services    5.854567
Interdisciplinary                      7.726897
Law & Public Policy                    6.785356
Physical Sciences                      5.454062
Psychology & Social Work               7.786702
Social Science                         6.568566
Name: Unemployment_rate, dtype: float64



In [44]:

    
# 11. the total and employed column refer to the people that were surveyed.
# Create a new column showing the emlpoyment rate of the people surveyed for each major
# call it "sample_employment_rate"
# Example the first row has total: 128148 and employed: 90245. it's 
# sample_employment_rate should be 90245.0 / 128148.0 = .7042

majors['sample_employment_rate'] = majors['Employed'] / majors['Total']
majors.head()









    Out[44]:






  
    
      
      Unnamed: 0
      Major_code
      Major
      Major_category
      Total
      Employed
      Employed_full_time_year_round
      Unemployed
      Unemployment_rate
      Median
      P25th
      P75th
      sample_employment_rate
    
  
  
    
      0
      0
      1100
      GENERAL AGRICULTURE
      Agriculture & Natural Resources
      128148
      90245
      74078
      2423
      2.614711
      50000
      34000
      80000.0
      0.704225
    
    
      1
      1
      1101
      AGRICULTURE PRODUCTION AND MANAGEMENT
      Agriculture & Natural Resources
      95326
      76865
      64240
      2266
      2.863606
      54000
      36000
      80000.0
      0.806338
    
    
      2
      2
      1102
      AGRICULTURAL ECONOMICS
      Agriculture & Natural Resources
      33955
      26321
      22810
      821
      3.024832
      63000
      40000
      98000.0
      0.775173
    
    
      3
      3
      1103
      ANIMAL SCIENCES
      Agriculture & Natural Resources
      103549
      81177
      64937
      3619
      4.267890
      46000
      30000
      72000.0
      0.783948
    
    
      4
      4
      1104
      FOOD SCIENCE
      Agriculture & Natural Resources
      24280
      17281
      12722
      894
      4.918845
      62000
      38500
      90000.0
      0.711738



In [45]:

    
# 12. Create a "sample_unemployment_rate" column
# this column should be 1 - "sample_employment_rate"

majors['sample_unemployment_rate'] = 1 - majors['sample_employment_rate']
majors.head()









    Out[45]:






  
    
      
      Unnamed: 0
      Major_code
      Major
      Major_category
      Total
      Employed
      Employed_full_time_year_round
      Unemployed
      Unemployment_rate
      Median
      P25th
      P75th
      sample_employment_rate
      sample_unemployment_rate
    
  
  
    
      0
      0
      1100
      GENERAL AGRICULTURE
      Agriculture & Natural Resources
      128148
      90245
      74078
      2423
      2.614711
      50000
      34000
      80000.0
      0.704225
      0.295775
    
    
      1
      1
      1101
      AGRICULTURE PRODUCTION AND MANAGEMENT
      Agriculture & Natural Resources
      95326
      76865
      64240
      2266
      2.863606
      54000
      36000
      80000.0
      0.806338
      0.193662
    
    
      2
      2
      1102
      AGRICULTURAL ECONOMICS
      Agriculture & Natural Resources
      33955
      26321
      22810
      821
      3.024832
      63000
      40000
      98000.0
      0.775173
      0.224827
    
    
      3
      3
      1103
      ANIMAL SCIENCES
      Agriculture & Natural Resources
      103549
      81177
      64937
      3619
      4.267890
      46000
      30000
      72000.0
      0.783948
      0.216052
    
    
      4
      4
      1104
      FOOD SCIENCE
      Agriculture & Natural Resources
      24280
      17281
      12722
      894
      4.918845
      62000
      38500
      90000.0
      0.711738
      0.288262



In [ ]:

	business_id	name	address	city	postal_code	latitude	longitude	phone_number	TaxCode	business_certificate	application_date	owner_name	owner_address	owner_city	owner_state	owner_zip
0	10	Tiramisu Kitchen	033 Belden Pl	San Francisco	94104	37.791116	-122.403816	NaN	H24	779059.0	NaN	Tiramisu LLC	33 Belden St	San Francisco	CA	94104
1	19	Nrgize Lifestyle Cafe	1200 Van Ness Ave, 3rd Floor	San Francisco	94109	37.786848	-122.421547	NaN	H24	NaN	NaN	24 Hour Fitness Inc	1200 Van Ness Ave, 3rd Floor	San Francisco	CA	94109
2	24	OMNI S.F. Hotel - 2nd Floor Pantry	500 California St, 2nd Floor	San Francisco	94104	37.792888	-122.403135	NaN	H24	352312.0	NaN	OMNI San Francisco Hotel Corp	500 California St, 2nd Floor	San Francisco	CA	94104
3	31	Norman's Ice Cream and Freezes	2801 Leavenworth St	San Francisco	94133	37.807155	-122.419004	NaN	H24	346882.0	NaN	Norman Antiforda	2801 Leavenworth St	San Francisco	CA	94133
4	45	CHARLIE'S DELI CAFE	3202 FOLSOM St	S.F.	94110	37.747114	-122.413641	NaN	H24	340024.0	10/10/2001	HARB, CHARLES AND KRISTIN	1150 SANCHEZ	S.F.	CA	94114

	business_id	Score	date	type
0	10	NaN	2014-08-07	Reinspection/Followup
1	10	94.0	2014-07-29	Routine - Unscheduled
2	10	NaN	2014-01-24	Reinspection/Followup
3	10	92.0	2014-01-14	Routine - Unscheduled
4	19	94.0	2014-11-10	Routine - Unscheduled

	business_id	date	ViolationTypeID	risk_category	description
0	10	20140114	103154	Low Risk	Unclean or degraded floors walls or ceilings
1	10	20140114	103119	Moderate Risk	Inadequate and inaccessible handwashing facili...
2	10	20140114	103145	Low Risk	Improper storage of equipment utensils or linens
3	10	20140729	103129	Moderate Risk	Insufficient hot water or running water
4	10	20140729	103144	Low Risk	Unapproved or unmaintained equipment or utensils

	business_id	name	address	city	postal_code	latitude	longitude	phone_number	TaxCode	business_certificate	...	owner_address	owner_city	owner_state	owner_zip	Score	date	type	ViolationTypeID	risk_category	description
48	58	Oasis Grill	91 Drumm St	San Francisco	94111	37.794483	-122.396584	NaN	H24	954377.0	...	91 Drumm St	San Francisco	CA	94111	78.0	20140725	Routine - Unscheduled	103109	High Risk	Unclean or unsanitary food contact surfaces
49	58	Oasis Grill	91 Drumm St	San Francisco	94111	37.794483	-122.396584	NaN	H24	954377.0	...	91 Drumm St	San Francisco	CA	94111	78.0	20140725	Routine - Unscheduled	103114	High Risk	High risk vermin infestation
67	66	STARBUCKS	1800 IRVING St	S.F.	94122	37.763578	-122.477461	NaN	H24	911744.0	...	1800 IRVING St	S.F.	CA	94122	91.0	20140519	Routine - Unscheduled	103112	High Risk	No hot water or running water
70	67	Revolution Cafe	3248 22nd St	SF	94110	37.755419	-122.419542	NaN	H24	NaN	...	3248 22nd St.	SF	CA	94110	87.0	20160401	Routine - Unscheduled	103109	High Risk	Unclean or unsanitary food contact surfaces
80	73	DINO'S UNCLE VITO	2101 Fillmore St	San Francisco	94115	37.788932	-122.433895	NaN	H24	186790.0	...	\t2101 FILLMORE ST	SAN FRANCISCO	CA	94115	85.0	20150717	Routine - Unscheduled	103103	High Risk	High risk food holding temperature

	Unnamed: 0	name	age	gender	raceethnicity	month	day	year	streetaddress	city	state	lawenforcementagency	cause	armed	county_income
0	0	A'donte Washington	16	Male	Black	February	23	2015	Clearview Ln	Millbrook	AL	Millbrook Police Department	Gunshot	No	54766
1	1	Aaron Rutledge	27	Male	White	April	2	2015	300 block Iris Park Dr	Pineville	LA	Rapides Parish Sheriff's Office	Gunshot	No	40930
2	2	Aaron Siler	26	Male	White	March	14	2015	22nd Ave and 56th St	Kenosha	WI	Kenosha Police Department	Gunshot	No	54930
3	3	Aaron Valdez	25	Male	Hispanic/Latino	March	11	2015	3000 Seminole Ave	South Gate	CA	South Gate Police Department	Gunshot	Firearm	55909
4	4	Adam Jovicic	29	Male	White	March	19	2015	364 Hiwood Ave	Munroe Falls	OH	Kent Police Department	Gunshot	No	49669

	Unnamed: 0	Major_code	Major	Major_category	Total	Employed	Employed_full_time_year_round	Unemployed	Unemployment_rate	Median	P25th	P75th
0	0	1100	GENERAL AGRICULTURE	Agriculture & Natural Resources	128148	90245	74078	2423	0.026147	50000	34000	80000.0
1	1	1101	AGRICULTURE PRODUCTION AND MANAGEMENT	Agriculture & Natural Resources	95326	76865	64240	2266	0.028636	54000	36000	80000.0
2	2	1102	AGRICULTURAL ECONOMICS	Agriculture & Natural Resources	33955	26321	22810	821	0.030248	63000	40000	98000.0
3	3	1103	ANIMAL SCIENCES	Agriculture & Natural Resources	103549	81177	64937	3619	0.042679	46000	30000	72000.0
4	4	1104	FOOD SCIENCE	Agriculture & Natural Resources	24280	17281	12722	894	0.049188	62000	38500	90000.0

	Major	Unemployed
161	BUSINESS MANAGEMENT AND ADMINISTRATION	147261
158	GENERAL BUSINESS	85626
114	PSYCHOLOGY	79066
159	ACCOUNTING	75379
13	COMMUNICATIONS	54390
73	ENGLISH LANGUAGE AND LITERATURE	52248
164	MARKETING AND MARKETING RESEARCH	51839
132	POLITICAL SCIENCE AND GOVERNMENT	40376
25	GENERAL EDUCATION	38742
78	BIOLOGY	36757

	Major	Unemployment_rate
146	MISCELLANEOUS FINE ARTS	15.614749
116	CLINICAL PSYCHOLOGY	10.271216
93	MILITARY TECHNOLOGIES	10.179641
27	SCHOOL STUDENT COUNSELING	10.174594
77	LIBRARY SCIENCE	9.484299
141	VISUAL AND PERFORMING ARTS	9.465800
19	COMPUTER PROGRAMMING AND DATA PROCESSING	9.026422
119	SOCIAL PSYCHOLOGY	8.733624
104	ASTRONOMY AND ASTROPHYSICS	8.602151
11	ARCHITECTURE	8.599113
143	FILM VIDEO AND PHOTOGRAPHIC ARTS	8.561891
17	COMMUNICATION TECHNOLOGIES	8.500867
145	STUDIO ARTS	8.371383
118	INDUSTRIAL AND ORGANIZATIONAL PSYCHOLOGY	8.362907
15	MASS MEDIA	8.300476
120	MISCELLANEOUS PSYCHOLOGY	8.200936
67	LINGUISTICS AND COMPARATIVE LANGUAGE AND LITER...	8.134809
139	DRAMA AND THEATER ARTS	8.027373
123	PUBLIC POLICY	7.921692
44	BIOMEDICAL ENGINEERING	7.903583
128	ANTHROPOLOGY AND ARCHEOLOGY	7.817578
76	HUMANITIES	7.784376
94	MULTI/INTERDISCIPLINARY STUDIES	7.726897
115	EDUCATIONAL PSYCHOLOGY	7.563114
22	COMPUTER ADMINISTRATION MANAGEMENT AND SECURITY	7.504572
101	PHILOSOPHY AND RELIGIOUS STUDIES	7.502034
95	INTERCULTURAL AND INTERNATIONAL STUDIES	7.433013
142	COMMERCIAL ART AND GRAPHIC DESIGN	7.391972
74	COMPOSITION AND RHETORIC	7.361495
172	UNITED STATES HISTORY	7.349961
...	...	...
9	FORESTRY	4.256333
102	THEOLOGY AND RELIGIOUS VOCATIONS	4.250511
59	PETROLEUM ENGINEERING	4.220535
32	SCIENCE AND COMPUTER TEACHER EDUCATION	4.219989
41	AEROSPACE ENGINEERING	4.197131
83	GENETICS	4.159095
38	ART AND MUSIC EDUCATION	4.097337
57	NAVAL ARCHITECTURE AND MARINE ENGINEERING	4.030882
7	MISCELLANEOUS AGRICULTURE	3.923042
39	MISCELLANEOUS EDUCATION	3.921524
103	PHYSICAL SCIENCES	3.895254
28	ELEMENTARY EDUCATION	3.835916
105	ATMOSPHERIC SCIENCES AND METEOROLOGY	3.672631
151	MEDICAL TECHNOLOGIES TECHNICIANS	3.620986
154	PHARMACY PHARMACEUTICAL SCIENCES AND ADMINISTR...	3.435768
80	BOTANY	3.402351
36	TEACHER EDUCATION: MULTIPLE LEVELS	3.335686
29	MATHEMATICS TEACHER EDUCATION	3.298302
5	PLANT SCIENCE AND AGRONOMY	3.179089
150	MEDICAL ASSISTING SERVICES	3.135684
2	AGRICULTURAL ECONOMICS	3.024832
1	AGRICULTURE PRODUCTION AND MANAGEMENT	2.863606
153	NURSING	2.679682
155	TREATMENT THERAPY PROFESSIONS	2.629160
0	GENERAL AGRICULTURE	2.614711
97	MATHEMATICS AND COMPUTER SCIENCE	2.490040
111	MATERIALS SCIENCE	2.233333
85	PHARMACOLOGY	1.611080
26	EDUCATIONAL ADMINISTRATION AND SUPERVISION	0.000000
51	GEOLOGICAL AND GEOPHYSICAL ENGINEERING	0.000000