In [44]:

    
import pandas as pd
import numpy as np
import sklearn
import subprocess
import warnings
pd.set_option('display.max_columns', None)



In [45]:

    
source_path = "/home/dolounet/dev/workshops/"



In [46]:

    
# coding: ISO-8859-1

Import data



In [47]:

    
raw_dataset = pd.read_csv(source_path + "Speed_Dating_Data.csv",encoding = "ISO-8859-1")

Data exploration

Shape, types, distribution, modalities and potential missing values



In [48]:

    
raw_dataset.head(2)









    Out[48]:






  
    
      
      iid
      id
      gender
      idg
      condtn
      wave
      round
      position
      positin1
      order
      partner
      pid
      match
      int_corr
      samerace
      age_o
      race_o
      pf_o_att
      pf_o_sin
      pf_o_int
      pf_o_fun
      pf_o_amb
      pf_o_sha
      dec_o
      attr_o
      sinc_o
      intel_o
      fun_o
      amb_o
      shar_o
      like_o
      prob_o
      met_o
      age
      field
      field_cd
      undergra
      mn_sat
      tuition
      race
      imprace
      imprelig
      from
      zipcode
      income
      goal
      date
      go_out
      career
      career_c
      sports
      tvsports
      exercise
      dining
      museums
      art
      hiking
      gaming
      clubbing
      reading
      tv
      theater
      movies
      concerts
      music
      shopping
      yoga
      exphappy
      expnum
      attr1_1
      sinc1_1
      intel1_1
      fun1_1
      amb1_1
      shar1_1
      attr4_1
      sinc4_1
      intel4_1
      fun4_1
      amb4_1
      shar4_1
      attr2_1
      sinc2_1
      intel2_1
      fun2_1
      amb2_1
      shar2_1
      attr3_1
      sinc3_1
      fun3_1
      intel3_1
      amb3_1
      attr5_1
      sinc5_1
      intel5_1
      fun5_1
      amb5_1
      dec
      attr
      sinc
      intel
      fun
      amb
      shar
      like
      prob
      met
      match_es
      attr1_s
      sinc1_s
      intel1_s
      fun1_s
      amb1_s
      shar1_s
      attr3_s
      sinc3_s
      intel3_s
      fun3_s
      amb3_s
      satis_2
      length
      numdat_2
      attr7_2
      sinc7_2
      intel7_2
      fun7_2
      amb7_2
      shar7_2
      attr1_2
      sinc1_2
      intel1_2
      fun1_2
      amb1_2
      shar1_2
      attr4_2
      sinc4_2
      intel4_2
      fun4_2
      amb4_2
      shar4_2
      attr2_2
      sinc2_2
      intel2_2
      fun2_2
      amb2_2
      shar2_2
      attr3_2
      sinc3_2
      intel3_2
      fun3_2
      amb3_2
      attr5_2
      sinc5_2
      intel5_2
      fun5_2
      amb5_2
      you_call
      them_cal
      date_3
      numdat_3
      num_in_3
      attr1_3
      sinc1_3
      intel1_3
      fun1_3
      amb1_3
      shar1_3
      attr7_3
      sinc7_3
      intel7_3
      fun7_3
      amb7_3
      shar7_3
      attr4_3
      sinc4_3
      intel4_3
      fun4_3
      amb4_3
      shar4_3
      attr2_3
      sinc2_3
      intel2_3
      fun2_3
      amb2_3
      shar2_3
      attr3_3
      sinc3_3
      intel3_3
      fun3_3
      amb3_3
      attr5_3
      sinc5_3
      intel5_3
      fun5_3
      amb5_3
    
  
  
    
      0
      1
      1.0
      0
      1
      1
      1
      10
      7
      NaN
      4
      1
      11.0
      0
      0.14
      0
      27.0
      2.0
      35.0
      20.0
      20.0
      20.0
      0.0
      5.0
      0
      6.0
      8.0
      8.0
      8.0
      8.0
      6.0
      7.0
      4.0
      2.0
      21.0
      Law
      1.0
      NaN
      NaN
      NaN
      4.0
      2.0
      4.0
      Chicago
      60,521
      69,487.00
      2.0
      7.0
      1.0
      lawyer
      NaN
      9.0
      2.0
      8.0
      9.0
      1.0
      1.0
      5.0
      1.0
      5.0
      6.0
      9.0
      1.0
      10.0
      10.0
      9.0
      8.0
      1.0
      3.0
      2.0
      15.0
      20.0
      20.0
      15.0
      15.0
      15.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      35.0
      20.0
      15.0
      20.0
      5.0
      5.0
      6.0
      8.0
      8.0
      8.0
      7.0
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      6.0
      9.0
      7.0
      7.0
      6.0
      5.0
      7.0
      6.0
      2.0
      4.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      6.0
      2.0
      1.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      19.44
      16.67
      13.89
      22.22
      11.11
      16.67
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      6.0
      7.0
      8.0
      7.0
      6.0
      NaN
      NaN
      NaN
      NaN
      NaN
      1.0
      1.0
      0.0
      NaN
      NaN
      15.0
      20.0
      20.0
      15.0
      15.0
      15.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      5.0
      7.0
      7.0
      7.0
      7.0
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      1
      1.0
      0
      1
      1
      1
      10
      7
      NaN
      3
      2
      12.0
      0
      0.54
      0
      22.0
      2.0
      60.0
      0.0
      0.0
      40.0
      0.0
      0.0
      0
      7.0
      8.0
      10.0
      7.0
      7.0
      5.0
      8.0
      4.0
      2.0
      21.0
      Law
      1.0
      NaN
      NaN
      NaN
      4.0
      2.0
      4.0
      Chicago
      60,521
      69,487.00
      2.0
      7.0
      1.0
      lawyer
      NaN
      9.0
      2.0
      8.0
      9.0
      1.0
      1.0
      5.0
      1.0
      5.0
      6.0
      9.0
      1.0
      10.0
      10.0
      9.0
      8.0
      1.0
      3.0
      2.0
      15.0
      20.0
      20.0
      15.0
      15.0
      15.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      35.0
      20.0
      15.0
      20.0
      5.0
      5.0
      6.0
      8.0
      8.0
      8.0
      7.0
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      7.0
      8.0
      7.0
      8.0
      5.0
      6.0
      7.0
      5.0
      1.0
      4.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      6.0
      2.0
      1.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      19.44
      16.67
      13.89
      22.22
      11.11
      16.67
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      6.0
      7.0
      8.0
      7.0
      6.0
      NaN
      NaN
      NaN
      NaN
      NaN
      1.0
      1.0
      0.0
      NaN
      NaN
      15.0
      20.0
      20.0
      15.0
      15.0
      15.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      5.0
      7.0
      7.0
      7.0
      7.0
      NaN
      NaN
      NaN
      NaN
      NaN



In [49]:

    
raw_dataset_copy = raw_dataset



In [50]:

    
columns_by_types = raw_dataset.columns.to_series().groupby(raw_dataset.dtypes).groups



In [51]:

    
raw_dataset.dtypes.value_counts()









    Out[51]:





float64    174
int64       13
object       8
dtype: int64



In [52]:

    
raw_dataset.isnull().sum().head(3)









    Out[52]:





iid       0
id        1
gender    0
dtype: int64



In [53]:

    
summary = raw_dataset.describe() #.transpose()
#print (summary.head())



In [54]:

    
#raw_dataset.groupby("gender").agg({"iid": pd.Series.nunique})
raw_dataset.groupby('gender').iid.nunique()









    Out[54]:





gender
0    274
1    277
Name: iid, dtype: int64



In [55]:

    
raw_dataset.groupby('career').iid.nunique().sort_values(ascending=False).head(5)









    Out[55]:





career
Finance          13
professor        12
Lawyer           11
Professor        10
Social Worker     9
Name: iid, dtype: int64



In [56]:

    
raw_dataset.groupby(["gender","match"]).iid.nunique()









    Out[56]:





gender  match
0       0        274
        1        221
1       0        277
        1        231
Name: iid, dtype: int64

Data processing



In [58]:

    
from love_matcher.refactored.main_cut import MainClass, RawSetProcessing, Trainer, FeatureEngineering, TuneParameters
local_path = source_path
local_filename = "Speed_Dating_Data.csv"
main_class = MainClass(workspace=local_path)
raw_dataframe = main_class.read_dataframe()
my_variables_selection = ["iid", "pid", "match","gender","date","go_out","sports","tvsports","exercise","dining",
                          "museums","art","hiking","gaming","clubbing","reading","tv","theater","movies",
                          "concerts","music","shopping","yoga"]



In [59]:

    
raw_set = RawSetProcessing(features=my_variables_selection, dataframe=raw_dataframe)
dataset_df = raw_set.combiner_pipeline()



In [60]:

    
dataset_df.head(2)









    Out[60]:






  
    
      
      iid
      pid
      match
      gender
      date
      go_out
      sports
      tvsports
      exercise
      dining
      museums
      art
      hiking
      gaming
      clubbing
      reading
      tv
      theater
      movies
      concerts
      music
      shopping
      yoga
    
  
  
    
      0
      1
      11.0
      0
      0
      7.0
      1.0
      9.0
      2.0
      8.0
      9.0
      1.0
      1.0
      5.0
      1.0
      5.0
      6.0
      9.0
      1.0
      10.0
      10.0
      9.0
      8.0
      1.0
    
    
      1
      1
      12.0
      0
      0
      7.0
      1.0
      9.0
      2.0
      8.0
      9.0
      1.0
      1.0
      5.0
      1.0
      5.0
      6.0
      9.0
      1.0
      10.0
      10.0
      9.0
      8.0
      1.0



In [61]:

    
# Number of unique participants
dataset_df.iid.nunique()









    Out[61]:





543



In [62]:

    
dataset_df.shape









    Out[62]:





(8271, 23)

Feature engineering



In [63]:

    
suffix_me = "_me"
suffix_partner = "_partner"
my_label = "match_perc"



In [64]:

    
feat_eng = FeatureEngineering(suffix_me, suffix_partner, my_label)
feat_engineered_df = feat_eng.get_partner_features(dataset_df)



In [65]:

    
feat_engineered_df.head(2)









    Out[65]:






  
    
      
      iid_me
      pid
      match
      gender_me
      date_me
      go_out_me
      sports_me
      tvsports_me
      exercise_me
      dining_me
      museums_me
      art_me
      hiking_me
      gaming_me
      clubbing_me
      reading_me
      tv_me
      theater_me
      movies_me
      concerts_me
      music_me
      shopping_me
      yoga_me
      iid_partner
      gender_partner
      date_partner
      go_out_partner
      sports_partner
      tvsports_partner
      exercise_partner
      dining_partner
      museums_partner
      art_partner
      hiking_partner
      gaming_partner
      clubbing_partner
      reading_partner
      tv_partner
      theater_partner
      movies_partner
      concerts_partner
      music_partner
      shopping_partner
      yoga_partner
    
  
  
    
      0
      1
      11.0
      0
      0
      7.0
      1.0
      9.0
      2.0
      8.0
      9.0
      1.0
      1.0
      5.0
      1.0
      5.0
      6.0
      9.0
      1.0
      10.0
      10.0
      9.0
      8.0
      1.0
      11
      1
      5.0
      4.0
      8.0
      7.0
      2.0
      6.0
      7.0
      5.0
      5.0
      5.0
      4.0
      9.0
      2.0
      4.0
      8.0
      7.0
      8.0
      5.0
      1.0
    
    
      1
      2
      11.0
      0
      0
      5.0
      1.0
      3.0
      2.0
      7.0
      10.0
      8.0
      6.0
      3.0
      5.0
      8.0
      10.0
      1.0
      9.0
      8.0
      7.0
      8.0
      3.0
      1.0
      11
      1
      5.0
      4.0
      8.0
      7.0
      2.0
      6.0
      7.0
      5.0
      5.0
      5.0
      4.0
      9.0
      2.0
      4.0
      8.0
      7.0
      8.0
      5.0
      1.0



In [66]:

    
feat_engineered_df.groupby("match").iid_me.count()









    Out[66]:





match
0    6826
1    1348
Name: iid_me, dtype: int64

Modelling

This model aims to predict the match depending on interests of the person.

Variables:

gender
date (In general, how frequently do you go on dates?)
go out (How often do you go out (not necessarily on dates)?
sports: Playing sports/ athletics
tvsports: Watching sports
excersice: Body building/exercising
dining: Dining out
museums: Museums/galleries
art: Art
hiking: Hiking/camping
gaming: Gaming
clubbing: Dancing/clubbing
reading: Reading
tv: Watching TV
theater: Theater
movies: Movies
concerts: Going to concerts
music: Music
shopping: Shopping
yoga: Yoga/meditation



In [67]:

    
import sklearn
print (sklearn.__version__)



In [68]:

    
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import subprocess

Variables selection



In [69]:

    
features = list(['iid',"gender","date","go_out","sports","tvsports","exercise","dining","museums","art",
                 "hiking","gaming","clubbing","reading","tv","theater","movies","concerts","music",
                 "shopping","yoga"])
label = "match"



In [70]:

    
#add suffix to each element of list
def process_features_names(features, suffix_1, suffix_2):
    features_me = [feat + suffix_1 for feat in features]
    features_partner = [feat + suffix_2 for feat in features]
    features_all = features_me + features_partner
    return features_all

features_model = process_features_names(features, suffix_me, suffix_partner)



In [71]:

    
explanatory = feat_engineered_df[features_model]
explained = feat_engineered_df[label]



In [72]:

    
explanatory[explanatory["iid_me"] == 1].head(5)









    Out[72]:






  
    
      
      iid_me
      gender_me
      date_me
      go_out_me
      sports_me
      tvsports_me
      exercise_me
      dining_me
      museums_me
      art_me
      hiking_me
      gaming_me
      clubbing_me
      reading_me
      tv_me
      theater_me
      movies_me
      concerts_me
      music_me
      shopping_me
      yoga_me
      iid_partner
      gender_partner
      date_partner
      go_out_partner
      sports_partner
      tvsports_partner
      exercise_partner
      dining_partner
      museums_partner
      art_partner
      hiking_partner
      gaming_partner
      clubbing_partner
      reading_partner
      tv_partner
      theater_partner
      movies_partner
      concerts_partner
      music_partner
      shopping_partner
      yoga_partner
    
  
  
    
      0
      1
      0
      7.0
      1.0
      9.0
      2.0
      8.0
      9.0
      1.0
      1.0
      5.0
      1.0
      5.0
      6.0
      9.0
      1.0
      10.0
      10.0
      9.0
      8.0
      1.0
      11
      1
      5.0
      4.0
      8.0
      7.0
      2.0
      6.0
      7.0
      5.0
      5.0
      5.0
      4.0
      9.0
      2.0
      4.0
      8.0
      7.0
      8.0
      5.0
      1.0
    
    
      10
      1
      0
      7.0
      1.0
      9.0
      2.0
      8.0
      9.0
      1.0
      1.0
      5.0
      1.0
      5.0
      6.0
      9.0
      1.0
      10.0
      10.0
      9.0
      8.0
      1.0
      12
      1
      1.0
      1.0
      9.0
      7.0
      9.0
      8.0
      7.0
      6.0
      3.0
      3.0
      5.0
      6.0
      6.0
      4.0
      7.0
      7.0
      9.0
      5.0
      5.0
    
    
      20
      1
      0
      7.0
      1.0
      9.0
      2.0
      8.0
      9.0
      1.0
      1.0
      5.0
      1.0
      5.0
      6.0
      9.0
      1.0
      10.0
      10.0
      9.0
      8.0
      1.0
      13
      1
      7.0
      1.0
      7.0
      8.0
      2.0
      9.0
      5.0
      6.0
      4.0
      7.0
      7.0
      6.0
      8.0
      10.0
      8.0
      9.0
      9.0
      8.0
      1.0
    
    
      30
      1
      0
      7.0
      1.0
      9.0
      2.0
      8.0
      9.0
      1.0
      1.0
      5.0
      1.0
      5.0
      6.0
      9.0
      1.0
      10.0
      10.0
      9.0
      8.0
      1.0
      14
      1
      4.0
      1.0
      10.0
      6.0
      8.0
      8.0
      3.0
      3.0
      10.0
      8.0
      8.0
      6.0
      7.0
      3.0
      10.0
      6.0
      8.0
      6.0
      1.0
    
    
      40
      1
      0
      7.0
      1.0
      9.0
      2.0
      8.0
      9.0
      1.0
      1.0
      5.0
      1.0
      5.0
      6.0
      9.0
      1.0
      10.0
      10.0
      9.0
      8.0
      1.0
      15
      1
      4.0
      1.0
      9.0
      7.0
      9.0
      7.0
      4.0
      3.0
      6.0
      7.0
      9.0
      8.0
      6.0
      9.0
      9.0
      6.0
      7.0
      2.0
      1.0



In [ ]:

Tuning



In [73]:

    
from sklearn import ensemble



In [74]:

    
warnings.filterwarnings("ignore")



In [75]:

    
# Parameters for Random Forest

parameters = [
  {'max_depth': [8,10,12,14,16,18], 
   'min_samples_split': [10,15,20,25,30], 
   'min_samples_leaf': [10,15,20,25,30]
  }
]
scores = ['precision', 'recall']



In [76]:

    
RFModel = ensemble.RandomForestClassifier(n_estimators=5, oob_score=False)



In [82]:

    
tune = TuneParameters(explanatory, explained, RFModel, parameters, scores)
best_parameters = tune.combiner_pipeline()
x_train, x_test, y_train, y_test  = tune.create_train_test_splits()









    



# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'min_samples_leaf': 10, 'min_samples_split': 15, 'max_depth': 10}

             precision    recall  f1-score   support

          0       0.84      1.00      0.91      3413
          1       0.71      0.01      0.03       674

avg / total       0.82      0.84      0.77      4087

Training



In [83]:

    
estimator_RFC = ensemble.RandomForestClassifier()



In [84]:

    
best_parameters









    Out[84]:





{'bootstrap': True,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': 10,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'min_impurity_split': 1e-07,
 'min_samples_leaf': 10,
 'min_samples_split': 15,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 5,
 'n_jobs': 1,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}



In [87]:

    
train = Trainer(x_train, y_train, x_test, y_test, best_parameters)
estimator, score_train, score_test = train.combiner_pipeline()
print (estimator, score_train, score_test)









    



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=10, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=10,
            min_samples_split=15, min_weight_fraction_leaf=0.0,
            n_estimators=5, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False) 0.842671886469 0.834352826034

Evaluate



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	iid	id	gender	idg	condtn	wave	round	position	positin1	order	partner	pid	match	int_corr	samerace	age_o	race_o	pf_o_att	pf_o_sin	pf_o_int	pf_o_fun	pf_o_amb	pf_o_sha	dec_o	attr_o	sinc_o	intel_o	fun_o	amb_o	shar_o	like_o	prob_o	met_o	age	field	field_cd	undergra	mn_sat	tuition	race	imprace	imprelig	from	zipcode	income	goal	date	go_out	career	career_c	sports	tvsports	exercise	dining	museums	art	hiking	gaming	clubbing	reading	tv	theater	movies	concerts	music	shopping	yoga	exphappy	expnum	attr1_1	sinc1_1	intel1_1	fun1_1	amb1_1	shar1_1	attr4_1	sinc4_1	intel4_1	fun4_1	amb4_1	shar4_1	attr2_1	sinc2_1	intel2_1	fun2_1	amb2_1	shar2_1	attr3_1	sinc3_1	fun3_1	intel3_1	amb3_1	attr5_1	sinc5_1	intel5_1	fun5_1	amb5_1	dec	attr	sinc	intel	fun	amb	shar	like	prob	met	match_es	attr1_s	sinc1_s	intel1_s	fun1_s	amb1_s	shar1_s	attr3_s	sinc3_s	intel3_s	fun3_s	amb3_s	satis_2	length	numdat_2	attr7_2	sinc7_2	intel7_2	fun7_2	amb7_2	shar7_2	attr1_2	sinc1_2	intel1_2	fun1_2	amb1_2	shar1_2	attr4_2	sinc4_2	intel4_2	fun4_2	amb4_2	shar4_2	attr2_2	sinc2_2	intel2_2	fun2_2	amb2_2	shar2_2	attr3_2	sinc3_2	intel3_2	fun3_2	amb3_2	attr5_2	sinc5_2	intel5_2	fun5_2	amb5_2	you_call	them_cal	date_3	numdat_3	num_in_3	attr1_3	sinc1_3	intel1_3	fun1_3	amb1_3	shar1_3	attr7_3	sinc7_3	intel7_3	fun7_3	amb7_3	shar7_3	attr4_3	sinc4_3	intel4_3	fun4_3	amb4_3	shar4_3	attr2_3	sinc2_3	intel2_3	fun2_3	amb2_3	shar2_3	attr3_3	sinc3_3	intel3_3	fun3_3	amb3_3	attr5_3	sinc5_3	intel5_3	fun5_3	amb5_3
0	1	1.0	0	1	1	1	10	7	NaN	4	1	11.0	0	0.14	0	27.0	2.0	35.0	20.0	20.0	20.0	0.0	5.0	0	6.0	8.0	8.0	8.0	8.0	6.0	7.0	4.0	2.0	21.0	Law	1.0	NaN	NaN	NaN	4.0	2.0	4.0	Chicago	60,521	69,487.00	2.0	7.0	1.0	lawyer	NaN	9.0	2.0	8.0	9.0	1.0	1.0	5.0	1.0	5.0	6.0	9.0	1.0	10.0	10.0	9.0	8.0	1.0	3.0	2.0	15.0	20.0	20.0	15.0	15.0	15.0	NaN	NaN	NaN	NaN	NaN	NaN	35.0	20.0	15.0	20.0	5.0	5.0	6.0	8.0	8.0	8.0	7.0	NaN	NaN	NaN	NaN	NaN	1	6.0	9.0	7.0	7.0	6.0	5.0	7.0	6.0	2.0	4.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	6.0	2.0	1.0	NaN	NaN	NaN	NaN	NaN	NaN	19.44	16.67	13.89	22.22	11.11	16.67	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	6.0	7.0	8.0	7.0	6.0	NaN	NaN	NaN	NaN	NaN	1.0	1.0	0.0	NaN	NaN	15.0	20.0	20.0	15.0	15.0	15.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	5.0	7.0	7.0	7.0	7.0	NaN	NaN	NaN	NaN	NaN
1	1	1.0	0	1	1	1	10	7	NaN	3	2	12.0	0	0.54	0	22.0	2.0	60.0	0.0	0.0	40.0	0.0	0.0	0	7.0	8.0	10.0	7.0	7.0	5.0	8.0	4.0	2.0	21.0	Law	1.0	NaN	NaN	NaN	4.0	2.0	4.0	Chicago	60,521	69,487.00	2.0	7.0	1.0	lawyer	NaN	9.0	2.0	8.0	9.0	1.0	1.0	5.0	1.0	5.0	6.0	9.0	1.0	10.0	10.0	9.0	8.0	1.0	3.0	2.0	15.0	20.0	20.0	15.0	15.0	15.0	NaN	NaN	NaN	NaN	NaN	NaN	35.0	20.0	15.0	20.0	5.0	5.0	6.0	8.0	8.0	8.0	7.0	NaN	NaN	NaN	NaN	NaN	1	7.0	8.0	7.0	8.0	5.0	6.0	7.0	5.0	1.0	4.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	6.0	2.0	1.0	NaN	NaN	NaN	NaN	NaN	NaN	19.44	16.67	13.89	22.22	11.11	16.67	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	6.0	7.0	8.0	7.0	6.0	NaN	NaN	NaN	NaN	NaN	1.0	1.0	0.0	NaN	NaN	15.0	20.0	20.0	15.0	15.0	15.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	5.0	7.0	7.0	7.0	7.0	NaN	NaN	NaN	NaN	NaN