Data notes

Wave I, the main survey, was fielded between February 21 and April 2, 2009. Wave 2 was fielded March 12, 2010 to June 8, 2010. Wave 3 was fielded March 22, 2011 to August 29, 2011. Wave 4 was fielded between March and November of 2013. Wave 5 was fielded between November, 2014 and March, 2015.



In [1]:

    
import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

pd.options.display.max_columns=1000

Load raw data



In [28]:

    
df = pd.read_stata('/gh/data/hcmst/1.dta')
# df2 = pd.read_stata('/gh/data/hcmst/2.dta')
# df3 = pd.read_stata('/gh/data/hcmst/3.dta')
# df = df1.merge(df2, on='caseid_new')
# df = df.merge(df3, on='caseid_new')
df.head(2)









    Out[28]:







  
    
      
      caseid_new
      weight1
      weight2
      ppage
      ppagecat
      ppagect4
      ppeduc
      ppeducat
      ppethm
      ppgender
      pphhhead
      pphouseholdsize
      pphouse
      ppincimp
      hhinc
      ppmarit
      ppmsacat
      ppreg4
      ppreg9
      pprent
      ppt01
      ppt1317
      ppt18ov
      ppt25
      ppt612
      children_in_hh
      ppwork
      ppnet
      ppq14arace
      pphispan
      pprace_white
      pprace_black
      pprace_nativeamerican
      pprace_asianindian
      pprace_chinese
      pprace_filipino
      pprace_japanese
      pprace_korean
      pprace_vietnamese
      pprace_otherasian
      pprace_hawaiian
      pprace_guamanian
      pprace_samoan
      pprace_otherpacificislander
      pprace_someotherrace
      papglb_friend
      pppartyid3
      papevangelical
      papreligion
      ppppcmdate_yrmo
      pppadate_yrmo
      pphhcomp11_member2_age
      pphhcomp11_member3_age
      pphhcomp11_member4_age
      pphhcomp11_member5_age
      pphhcomp11_member6_age
      pphhcomp11_member7_age
      pphhcomp11_member8_age
      pphhcomp11_member9_age
      pphhcomp11_member10_age
      pphhcomp11_member11_age
      pphhcomp11_member12_age
      pphhcomp11_member13_age
      pphhcomp11_member14_age
      pphhcomp11_member15_age
      pphhcomp11_member2_gender
      pphhcomp11_member3_gender
      pphhcomp11_member4_gender
      pphhcomp11_member5_gender
      pphhcomp11_member6_gender
      pphhcomp11_member7_gender
      pphhcomp11_member8_gender
      pphhcomp11_member9_gender
      pphhcomp11_member10_gender
      pphhcomp11_member11_gender
      pphhcomp11_member12_gender
      pphhcomp11_member13_gender
      pphhcomp11_member14_gender
      pphhcomp11_member15_gender
      pphhcomp11_member2_relationship
      pphhcomp11_member3_relationship
      pphhcomp11_member4_relationship
      pphhcomp11_member5_relationship
      pphhcomp11_member6_relationship
      pphhcomp11_member7_relationship
      pphhcomp11_member8_relationship
      pphhcomp11_member9_relationship
      pphhcomp11_member10_relationship
      pphhcomp11_member11_relationship
      pphhcomp11_member12_relationship
      pphhcomp11_member13_relationship
      pphhcomp11_member14_relationship
      pphhcomp11_member15_relationship
      irb_consent
      weight3
      weight4
      weight5
      weight6
      weight7
      weight_couples_coresident
      HCMST_main_interview_yrmo
      duration
      qflag
      glbstatus
      papglb_status
      recsource
      s1
      s1a
      s2
      q3_codes
      q4
      q5
      q6a
      q6b
      q7a
      q7b
      q8a
      q8b
      q9
      q10
      q11
      q12
      q13a
      q13b
      q14
      q15a1_compressed
      q16
      q17a
      q17b
      q17c
      q17d
      gender_attraction
      q18a_1
      q18a_2
      q18a_3
      q18a_refused
      q18b_codes
      q18c_codes
      q19
      q20
      q21a
      q21a_refusal
      q21b
      q21b_refusal
      q21c
      q21c_refusal
      q21d
      q21d_refusal
      q21e
      q21e_refusal
      q22
      q23
      q24_codes
      q25
      q26
      q27
      q28
      q29
      q30
      q31_1
      q31_2
      q31_3
      q31_4
      q31_5
      q31_6
      q31_7
      q31_8
      q31_9
      q31_other_text_entered
      q32
      q33_1
      q33_2
      q33_3
      q33_4
      q33_5
      q33_6
      q33_7
      q33_other_text_entered
      q34
      q35_codes
      q35_text_entered
      q24_met_online
      summary_q24_total
      q24_R_cowork
      q24_R_friend
      q24_R_family
      q24_R_sig_other
      q24_R_neighbor
      q24_P_cowork
      q24_P_friend
      q24_P_family
      q24_P_sig_other
      q24_P_neighbor
      q24_btwn_I_cowork
      q24_btwn_I_friend
      q24_btwn_I_family
      q24_btwn_I_sig_other
      q24_btwn_I_neighbor
      q24_school
      q24_college
      q24_military
      q24_church
      q24_vol_org
      q24_customer
      q24_bar_restaurant
      q24_internet_dating
      q24_internet_social_networking
      q24_internet_game
      q24_internet_chat
      q24_internet_community
      q24_internet_other
      q24_public
      q24_private_party
      q24_blind_date
      q24_vacation
      q24_singles_service_non_internet
      q24_business_trip
      q24_work_neighbor
      q24_fam_sister_active
      q24_fam_brother_active
      q24_fam_mother_active
      q24_fam_father_active
      q24_fam_other_active
      q24_fam_cousins_active
      q24_fam_aunt_niece_active
      q24_fam_uncle_nephew_active
      q24_fam_grandmother_active
      q24_fam_grandfather_active
      q24_fam_sister_passive
      q24_fam_brother_passive
      q24_fam_mother_passive
      q24_fam_father_passive
      q24_fam_other_passive
      q24_fam_cousins_passive
      q24_fam_aunt_niece_passive
      q24_fam_uncle_nephew_passive
      q24_fam_grandmother_passive
      q24_fam_grandfather_passive
      q24_fam_female
      q24_fam_male
      distancemoved_10mi
      marrynotreally
      marrycountry
      civilnotreally
      partner_deceased
      partner_religion_reclassified
      partner_religion_child_reclass
      own_religion_child_reclass
      q32_internet
      how_met_online
      either_internet
      either_internet_adjusted
      same_sex_couple
      potential_partner_gender_recodes
      alt_partner_gender
      how_long_ago_first_met
      how_long_ago_first_romantic
      how_long_ago_first_cohab
      how_long_ago_first_met_cat
      how_long_relationship
      respondent_race
      partner_race
      age_difference
      met_through_friends
      met_through_family
      met_through_as_neighbors
      met_through_as_coworkers
      respondent_religion_at_16
      respondent_relig_16_cat
      partner_religion_at_16
      partner_relig_16_cat
      married
      parental_approval
      respondent_yrsed
      partner_yrsed
      home_country_recode
      US_raised
      partner_mom_yrsed
      respondent_mom_yrsed
      relationship_quality
      coresident
      pp2_afterp1
      pp2_pphhhead
      pp2_pphhsize
      pp2_pphouse
      pp2_ppincimp
      pp2_ppmarit
      pp2_ppmsacat
      pp2_ppeduc
      pp2_ppeducat
      pp2_respondent_yrsed
      pp2_ppethm
      pp2_ppreg4
      pp2_ppreg9
      pp2_pprent
      pp2_ppt01
      pp2_ppt1317
      pp2_ppt18ov
      pp2_ppt25
      pp2_ppt612
      pp2_ppwork
      pp2_ppnet
      pp2_ppcmdate_yrmo
      pp_igdr1
      pp_ieduc1
      pp2_igdr2
      pp2_ieduc2
      w2_deceased
      w2_multiname
      w2_panelstat
      w2_donotcontact
      w2_assigned
      w2_f1complete
      w2_HCMST_interview_fin_yrmo
      w2_duration
      w2_xmarry
      w2_xss
      w2_source
      w2_q1
      w2_q2
      w2_q3
      w2_q4
      w2_q5
      w2_q6
      w2_q7
      w2_q8
      w2_q9
      w2_q10
      w2_broke_up
      w2_days_elapsed
      pp3_pphhhead
      pp3_pphhsize
      pp3_pphouse
      pp3_ppincimp
      pp3_ppmarit
      pp3_ppmsacat
      pp3_pprent
      pp3_ppreg4
      pp3_ppreg9
      interstate_mover_pp1_pp2
      interstate_mover_pp2_pp3
      interstate_mover_pp1_pp3
      pp3_ppt01
      pp3_ppt1317
      pp3_ppt18ov
      pp3_ppt25
      pp3_ppt612
      pp3_ppwork
      pp3_ppnet
      pp3_ppcmdate_yrmo
      pp3_ppeduc
      pp3_ppeducat
      pp3_respondent_yrsed
      pp3_ppethm
      pp3_newer
      w2w3_combo_breakup
      w3_broke_up
      w3_xpartnered
      w3_xdeceased
      w3_multiname
      w3_xss
      w3_xlast
      w3_xyear
      w3_xmonth
      w3_xqualified
      w3_status
      w3_complete
      w3_source
      w3_HCMST_interview_fin_yrmo
      w3_days_elapsed
      w3_duration
      w3_xmarry
      w3_xtype
      w3_q1
      w3_q2
      w3_q3
      w3_q4
      w3_mbtiming_year
      w3_mbtiming_month
      w3_q5
      w3_q6
      w3_q7
      w3_q8
      w3_q9
      w3_q10
      w3_nonmbtiming_year
      w3_nonmbtiming_month
    
  
  
    
      0
      22526
      4265
      4265.0
      52
      45-54
      45-59
      bachelors degree
      bachelor's degree or higher
      hispanic
      female
      yes
      2
      a building with 2 or more apartments
      $20,000 to $24,999
      22250.0
      living with partner
      metro
      midwest
      east-north central
      rented for cash
      0
      0
      2
      0
      0
      0
      working - as a paid employee
      yes
      not asked
      yes, other spanish/hispanic/latino
      yes
      no
      no
      no
      no
      no
      no
      no
      no
      no
      no
      no
      no
      no
      no
      yes, friends
      democrat
      yes
      catholic
      200711.0
      200709.0
      47.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      female
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      unmarried partner
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      yes, i agree to participate
      4265.0
      5618
      7505.0
      3576.0
      3576.0
      634.354187
      200902.0
      20
      partnered
      glb
      yes
      glb augment sample
      no, i am not married
      NaN
      yes, i have a sexual partner (boyfriend or gir...
      NaN
      female
      yes, we are a same-sex couple
      no (not latino or hispanic)
      white
      no
      protestant (e.g. methodist, lutheran, presbyte...
      yes, the same
      NaN
      48.0
      associate degree
      hs graduate or ged
      democrat
      no, i have changed religions
      protestant (e.g. methodist, lutheran, presbyte...
      bachelor's degree
      United States
      0.0
      NaN
      once
      i am mostly sexually attracted to women, less ...
      NaN
      same gender mostly
      no
      no
      have neither DP nor CU
      no
      NaN
      NaN
      yes
      NaN
      45.0
      NaN
      45.0
      NaN
      45.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      partner earned more
      NaN
      different high school
      did not attend same college or university
      no
      no
      neither father nor mother are alive
      NaN
      no
      no
      no
      yes
      no
      no
      no
      no
      no
      No
      yes, a different kind of internet service
      refused
      refused
      refused
      refused
      refused
      refused
      refused
      No
      good
      NaN
      Yes
      met online
      2.0
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      Yes
      No
      No
      No
      No
      No
      Yes
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      10.0
      NaN
      
      NaN
      not deceased
      NaN
      NaN
      NaN
      1.0
      Previously Strangers: Before online connection...
      Yes
      met online
      same-sex couple
      NaN
      female
      7.0
      7.0
      7.0
      6-10
      7.0
      Hispanic
      NH white
      4.0
      not met through friends
      not met through family
      did not meet through or as neighbors
      0.0
      protestant (e.g. methodist, lutheran, presbyte...
      Protestant or oth Christian
      protestant (e.g. methodist, lutheran, presbyte...
      Protestant or oth Christian
      not married
      NaN
      16.0
      14.0
      NaN
      raised in US
      12.0
      16.0
      good
      Yes
      Yes second background survey
      yes
      2.0
      a building with 2 or more apartments
      $20,000 to $24,999
      living with partner
      metro
      bachelors degree
      bachelor's degree or higher
      16.0
      hispanic
      midwest
      east-north central
      rented for cash
      0.0
      0.0
      2.0
      0.0
      0.0
      not working - looking for work
      yes
      200905.0
      value not imputed
      value not imputed
      value not imputed
      value not imputed
      not deceased
      NaN
      withdrawn kn panelist
      all other cases
      assigned to survey
      completed followup survey
      201003.0
      6.0
      partnered
      yes, qualified to ask about new domestic parte...
      online
      NaN
      NaN
      NaN
      NaN
      yes
      yes
      no, did not marry [partner]
      no, we have not gotten a domestic partnership ...
      NaN
      NaN
      still together
      390.0
      Yes
      2.0
      A building with 2 or more apartments
      $15,000 to $19,999
      Living with partner
      Metro
      Rented for cash
      Midwest
      East-North Central
      stayer
      stayer
      stayer
      0.0
      0.0
      2.0
      0.0
      0.0
      Not working - looking for work
      Yes
      201107.0
      Bachelors degree
      Bachelor's degree or higher
      16.0
      Hispanic
      Yes, pp3 data is newer and available
      still together, or lost to follow-up, or partn...
      still together
      Qualified for follow-up at wave3
      not deceased
      NaN
      yes
      1 year ago
      2010.0
      3.0
      qualified for wave 3
      active member of KN panel
      yes
      Online
      201104.0
      774.0
      2.0
      Partnered
      same sex couple
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      yes
      yes
      no, did not marry [xNameP]
      No, we have not gotten a domestic partnership ...
      NaN
      NaN
      NaN
      NaN
    
    
      1
      23286
      16485
      16485.0
      28
      25-34
      18-29
      masters degree
      bachelor's degree or higher
      white, non-hispanic
      female
      yes
      2
      a building with 2 or more apartments
      $40,000 to $49,999
      45000.0
      living with partner
      metro
      west
      pacific
      rented for cash
      0
      0
      2
      0
      0
      0
      working - as a paid employee
      yes
      not asked
      no, i am not
      yes
      no
      no
      no
      no
      no
      no
      no
      no
      no
      no
      no
      no
      no
      no
      yes, both
      democrat
      no
      jewish
      200711.0
      200709.0
      29.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      female
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      spouse
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      yes, i agree to participate
      16485.0
      8383
      11198.0
      10241.0
      10241.0
      9589.906250
      200902.0
      13
      partnered
      glb
      yes
      glb augment sample
      yes, i am married
      NaN
      NaN
      NaN
      female
      yes, we are a same-sex couple
      no (not latino or hispanic)
      white
      no
      none
      no, has changed religions
      other christian
      30.0
      bachelor's degree
      bachelor's degree
      democrat
      no, i have changed religions
      jewish
      professional or doctorate degree
      United States
      0.0
      once (this is my first marriage)
      NaN
      i am equally sexually attracted to men and women
      NaN
      both genders equally
      yes
      no
      have either DP or CU
      no
      NaN
      NaN
      yes
      NaN
      19.0
      NaN
      20.0
      NaN
      22.0
      NaN
      23.0
      NaN
      26.0
      NaN
      NaN
      i earned more
      NaN
      different high school
      attended same college or university
      no
      no
      father and mother
      disapprove
      no
      yes
      no
      no
      no
      no
      no
      no
      no
      No
      no, we did not meet through the internet
      no
      no
      no
      yes
      no
      no
      no
      No
      good
      NaN
      Yes
      met offline
      4.0
      No
      Yes
      No
      No
      Yes
      No
      Yes
      No
      No
      No
      No
      No
      No
      No
      No
      No
      Yes
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      No
      2510.0
      married
      USA
      real civ union or dom partnership
      not deceased
      NaN
      NaN
      NaN
      0.0
      NaN
      No
      not met online
      same-sex couple
      NaN
      female
      9.0
      8.0
      6.0
      6-10
      8.0
      NH white
      NH white
      2.0
      meet through friends
      not met through family
      met through or as neighbors
      0.0
      jewish
      Jewish
      other christian
      Protestant or oth Christian
      married
      don't approve or don't know
      17.0
      16.0
      NaN
      raised in US
      16.0
      20.0
      good
      Yes
      Yes second background survey
      yes
      2.0
      a building with 2 or more apartments
      $100,000 to $124,999
      married
      metro
      professional or doctorate degree
      bachelor's degree or higher
      20.0
      white, non-hispanic
      west
      pacific
      rented for cash
      0.0
      0.0
      2.0
      0.0
      0.0
      working - as a paid employee
      yes
      200904.0
      value not imputed
      value not imputed
      value not imputed
      value not imputed
      not deceased
      NaN
      withdrawn kn panelist
      all other cases
      assigned to survey
      completed followup survey
      201003.0
      0.0
      married
      no
      online
      yes
      yes
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      still together
      390.0
      Yes
      2.0
      A one-family house attached to one or more houses
      $85,000 to $99,999
      Living with partner
      Metro
      Rented for cash
      West
      Pacific
      stayer
      stayer
      stayer
      0.0
      0.0
      2.0
      0.0
      0.0
      Working - as a paid employee
      Yes
      201106.0
      Professional or Doctorate degree
      Bachelor's degree or higher
      20.0
      White, Non-Hispanic
      Yes, pp3 data is newer and available
      still together, or lost to follow-up, or partn...
      still together
      Qualified for follow-up at wave3
      not deceased
      NaN
      no
      1 year ago
      2010.0
      3.0
      qualified for wave 3
      active member of KN panel
      yes
      Online
      201104.0
      788.0
      0.0
      Married
      same sex couple
      yes
      yes
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN

Select and rename columns



In [29]:

    
rename_cols_dict = {'ppage': 'age', 'ppeducat': 'education',
                   'ppethm': 'race', 'ppgender': 'sex',
                   'pphouseholdsize': 'household_size', 'pphouse': 'house_type',
                   'hhinc': 'income', 'ppmarit': 'marital_status',
                   'ppmsacat': 'in_metro', 'ppreg4': 'usa_region',
                   'pprent': 'house_payment', 'children_in_hh': 'N_child',
                   'ppwork': 'work', 'ppnet': 'has_internet',
                   'papglb_friend': 'has_gay_friendsfam', 'pppartyid3': 'politics',
                   'papreligion': 'religion', 'qflag': 'in_relationship',
                   'q9': 'partner_age', 'duration': 'N_minutes_survey',
                   'glbstatus': 'is_lgb', 's1': 'is_married',
                   'partner_race': 'partner_race', 'q7b': 'partner_religion',
                   'q10': 'partner_education', 'US_raised': 'USA_raised',
                   'q17a': 'N_marriages', 'q17b': 'N_marriages2', 'coresident': 'cohabit',
                   'q21a': 'age_first_met', 'q21b': 'age_relationship_begin',
                   'q21d': 'age_married', 'q23': 'relative_income',
                   'q25': 'same_high_school', 'q26': 'same_college',
                   'q27': 'same_hometown', 'age_difference': 'age_difference',
                   'q34':'relationship_quality',
                   'q24_met_online': 'met_online', 'met_through_friends': 'met_friends',
                   'met_through_family': 'met_family', 'met_through_as_coworkers': 'met_work'}

df = df[list(rename_cols_dict.keys())]
df.rename(columns=rename_cols_dict, inplace=True)

# Process number of marriages
df['N_marriages'] = df['N_marriages'].astype(str).replace({'nan':''}) + df['N_marriages2'].astype(str).replace({'nan':''})
df.drop('N_marriages2', axis=1, inplace=True)
df['N_marriages'] = df['N_marriages'].replace({'':np.nan, 'once (this is my first marriage)': 'once', 'refused':np.nan})
df['N_marriages'] = df['N_marriages'].astype('category')



In [30]:

    
# Clean entries to make simpler
df['in_metro'] = df['in_metro']=='metro'
df['relationship_excellent'] = df['relationship_quality'] == 'excellent'

df['house_payment'].replace({'owned or being bought by you or someone in your household': 'owned',
                         'rented for cash': 'rent',
                         'occupied without payment of cash rent': 'free'}, inplace=True)
df['race'].replace({'white, non-hispanic': 'white',
                '2+ races, non-hispanic': 'other, non-hispanic',
                'black, non-hispanic': 'black'}, inplace=True)
df['house_type'].replace({'a one-family house detached from any other house': 'house',
                'a building with 2 or more apartments': 'apartment',
                'a one-family house attached to one or more houses': 'house',
                'a mobile home': 'mobile',
                'boat, rv, van, etc.': 'mobile'}, inplace=True)
df['is_not_working'] = df['work'].str.contains('not working')
df['has_internet'] = df['has_internet'] == 'yes'
df['has_gay_friends'] = np.logical_or(df['has_gay_friendsfam']=='yes, friends', df['has_gay_friendsfam']=='yes, both')
df['has_gay_family'] = np.logical_or(df['has_gay_friendsfam']=='yes, relatives', df['has_gay_friendsfam']=='yes, both')
df['religion_is_christian'] = df['religion'].isin(['protestant (e.g., methodist, lutheran, presbyterian, episcopal)',
                                                   'catholic', 'baptist-any denomination', 'other christian', 'pentecostal', 'mormon', 'eastern orthodox'])
df['religion_is_none'] = df['religion'].isin(['none'])
df['in_relationship'] = df['in_relationship']=='partnered'
df['is_lgb'] = df['is_lgb']=='glb'
df['is_married'] = df['is_married']=='yes, i am married'
df['partner_race'].replace({'NH white': 'white', ' NH black': 'black',
                            ' NH Asian Pac Islander':'other', ' NH Other': 'other', ' NH Amer Indian': 'other'}, inplace=True)
df['partner_religion_is_christian'] = df['partner_religion'].isin(['protestant (e.g., methodist, lutheran, presbyterian, episcopal)',
                                                   'catholic', 'baptist-any denomination', 'other christian', 'pentecostal', 'mormon', 'eastern orthodox'])
df['partner_religion_is_none'] = df['partner_religion'].isin(['none'])
df['partner_education'] = df['partner_education'].map({'hs graduate or ged': 'high school',
                                                       'some college, no degree': 'some college',
                                                       "associate degree": "some college",
                                                       "bachelor's degree": "bachelor's degree or higher",
                                                       "master's degree": "bachelor's degree or higher",
                                                       "professional or doctorate degree": "bachelor's degree or higher"})
df['partner_education'].fillna('less than high school', inplace=True)
df['USA_raised'] = df['USA_raised']=='raised in US'
df['N_marriages'] = df['N_marriages'].map({'never married': '0', 'once': '1', 'twice': '2', 'three times': '3+', 'four or more times':'3+'})
df['relative_income'].replace({'i earned more': 'more', 'partner earned more': 'less',
                               'we earned about the same amount': 'same', 'refused': np.nan}, inplace=True)
df['same_high_school'] = df['same_high_school']=='same high school'
df['same_college'] = df['same_college']=='attended same college or university'
df['same_hometown'] = df['same_hometown']=='yes'
df['cohabit'] = df['cohabit']=='yes'
df['met_online'] = df['met_online']=='met online'
df['met_friends'] = df['met_friends']=='meet through friends'
df['met_family'] = df['met_family']=='met through family'
df['met_work'] = df['met_family']==1

df['age'] = df['age'].astype(int)
for c in df.columns:
    if str(type(df[c])) == 'object':
        df[c] = df[c].astype('category')



In [53]:

    
df.head()









    Out[53]:







  
    
      
      age
      education
      race
      sex
      household_size
      house_type
      income
      marital_status
      in_metro
      usa_region
      house_payment
      N_child
      work
      has_internet
      has_gay_friendsfam
      politics
      religion
      in_relationship
      partner_age
      N_minutes_survey
      is_lgb
      is_married
      partner_race
      partner_religion
      partner_education
      USA_raised
      N_marriages
      cohabit
      age_first_met
      age_relationship_begin
      age_married
      relative_income
      same_high_school
      same_college
      same_hometown
      age_difference
      relationship_quality
      met_online
      met_friends
      met_family
      met_work
      relationship_excellent
      is_not_working
      has_gay_friends
      has_gay_family
      religion_is_christian
      religion_is_none
      partner_religion_is_christian
      partner_religion_is_none
    
  
  
    
      0
      52
      bachelor's degree or higher
      hispanic
      female
      2
      apartment
      22250.0
      living with partner
      True
      midwest
      rent
      0
      working - as a paid employee
      True
      yes, friends
      democrat
      catholic
      True
      48.0
      20
      True
      False
      white
      protestant (e.g. methodist, lutheran, presbyte...
      some college
      True
      1
      False
      45.0
      45.0
      NaN
      less
      False
      False
      False
      4.0
      good
      True
      False
      False
      False
      False
      False
      True
      False
      True
      False
      False
      False
    
    
      1
      28
      bachelor's degree or higher
      white
      female
      2
      apartment
      45000.0
      living with partner
      True
      west
      rent
      0
      working - as a paid employee
      True
      yes, both
      democrat
      jewish
      True
      30.0
      13
      True
      True
      white
      none
      bachelor's degree or higher
      True
      1
      False
      19.0
      20.0
      23.0
      more
      False
      True
      False
      2.0
      good
      False
      True
      False
      False
      False
      False
      True
      True
      False
      False
      False
      True
    
    
      2
      49
      high school
      black
      female
      4
      apartment
      37250.0
      never married
      True
      south
      rent
      1
      working - as a paid employee
      True
      yes, both
      democrat
      baptist-any denomination
      False
      NaN
      0
      False
      False
      NaN
      NaN
      less than high school
      False
      NaN
      False
      NaN
      NaN
      NaN
      NaN
      False
      False
      False
      NaN
      NaN
      False
      False
      False
      False
      False
      False
      True
      True
      True
      False
      False
      False
    
    
      3
      31
      some college
      white
      male
      1
      apartment
      45000.0
      never married
      True
      south
      owned
      0
      working - as a paid employee
      True
      yes, both
      democrat
      other non-christian, please specify:
      True
      40.0
      9
      True
      False
      white
      other non-christian, please specify
      high school
      True
      0
      False
      23.0
      23.0
      NaN
      more
      False
      False
      False
      9.0
      good
      True
      True
      False
      False
      False
      False
      True
      True
      False
      False
      False
      False
    
    
      4
      35
      high school
      white
      male
      2
      house
      45000.0
      never married
      True
      south
      owned
      0
      working - self-employed
      True
      yes, friends
      democrat
      other christian
      False
      NaN
      1
      True
      False
      NaN
      NaN
      less than high school
      False
      NaN
      False
      NaN
      NaN
      NaN
      NaN
      False
      False
      False
      NaN
      NaN
      False
      False
      False
      False
      False
      False
      True
      False
      True
      False
      False
      False



In [58]:

    
df.to_csv('/gh/data/hcmst/1_cleaned.csv')

Distributions



In [54]:

    
for c in df.columns:
    print(df[c].value_counts())









    



56    107
44    104
29    103
43     96
50     96
49     94
55     92
28     91
41     90
54     90
51     86
38     86
39     86
59     84
53     83
58     82
45     78
42     78
48     77
20     76
27     75
40     74
46     73
57     71
36     71
62     68
52     67
47     66
21     65
32     64
     ... 
66     48
69     41
68     40
73     38
70     36
19     33
76     33
71     33
74     30
72     29
78     29
75     25
79     25
77     23
82     22
80     21
83     17
85     13
84     12
81     11
89      6
88      6
86      4
92      3
90      3
91      2
87      2
93      1
94      1
95      1
Name: age, Length: 77, dtype: int64
bachelor's degree or higher    1407
some college                   1210
high school                     987
less than high school           398
Name: education, dtype: int64
white                  3007
hispanic                387
black                   321
other, non-hispanic     287
Name: race, dtype: int64
female    2035
male      1967
Name: sex, dtype: int64
2     1461
1      970
3      581
4      566
5      252
6      102
7       42
8       14
9        7
10       3
11       2
15       1
12       1
Name: household_size, dtype: int64
house        3113
apartment     720
mobile        169
Name: house_type, dtype: int64
67250.0     461
55000.0     422
45000.0     397
92250.0     311
112250.0    300
80000.0     296
37250.0     260
22250.0     219
27250.0     212
32250.0     201
137250.0    162
17250.0     157
200000.0    127
11250.0     106
8750.0      100
162250.0     87
13750.0      85
6250.0       54
2500.0       45
Name: income, dtype: int64
married                1736
never married          1020
living with partner     503
divorced                477
widowed                 204
separated                62
Name: marital_status, dtype: int64
True     3436
False     566
Name: in_metro, dtype: int64
south        1262
west          975
midwest       955
northeast     810
Name: usa_region, dtype: int64
owned    3040
rent      894
free       68
Name: house_payment, dtype: int64
0    3075
1     443
2     331
3     113
4      31
5       7
7       1
6       1
Name: N_child, dtype: int64
working - as a paid employee                    2191
not working - retired                            637
not working - disabled                           347
working - self-employed                          341
not working - other                              286
not working - looking for work                   180
not working - on temporary layoff from a job      20
Name: work, dtype: int64
True     2770
False    1232
Name: has_internet, dtype: int64
yes, friends                                  1372
no                                            1171
yes, both                                     1059
yes, relatives                                 290
i would prefer to not answer this question     100
Name: has_gay_friendsfam, dtype: int64
democrat      2385
republican    1505
other          112
Name: politics, dtype: int64
protestant (e.g., methodist, lutheran, presbyterian, episcopal)    918
catholic                                                           897
none                                                               640
baptist-any denomination                                           523
other christian                                                    482
other non-christian, please specify:                               172
pentecostal                                                        110
jewish                                                             110
mormon                                                              66
buddhist                                                            33
eastern orthodox                                                    15
hindu                                                               13
muslim                                                               8
Name: religion, dtype: int64
True     3009
False     993
Name: in_relationship, dtype: int64
50.0    84
30.0    83
29.0    77
36.0    76
31.0    74
38.0    74
43.0    73
42.0    71
44.0    71
51.0    69
55.0    68
47.0    67
39.0    66
28.0    65
40.0    63
52.0    63
34.0    61
45.0    61
41.0    60
33.0    59
32.0    59
46.0    59
54.0    58
35.0    58
37.0    58
49.0    56
48.0    56
60.0    54
53.0    53
58.0    51
        ..
69.0    26
73.0    23
66.0    23
71.0    23
75.0    21
77.0    20
67.0    19
78.0    19
19.0    18
68.0    18
82.0    18
74.0    17
80.0    12
81.0    11
76.0    10
79.0     8
18.0     7
83.0     7
85.0     6
86.0     4
84.0     4
89.0     3
87.0     3
17.0     2
91.0     2
99.0     2
16.0     2
88.0     1
92.0     1
90.0     1
Name: partner_age, Length: 78, dtype: int64
1        366
0        337
8        184
9        183
10       176
11       173
12       164
2        155
7        155
13       144
14       133
6        121
17       112
15       107
16        90
21        84
18        84
5         72
19        70
20        65
23        54
3         54
24        49
4         47
25        40
27        39
29        38
22        38
26        38
28        30
        ... 
281        1
277        1
4359       1
261        1
251        1
239        1
211        1
18620      1
187        1
171        1
14504      1
167        1
157        1
147        1
143        1
137        1
131        1
129        1
121        1
113        1
109        1
107        1
101        1
99         1
95         1
89         1
87         1
38994      1
73         1
4076       1
Name: N_minutes_survey, Length: 316, dtype: int64
False    3047
True      955
Name: is_lgb, dtype: int64
False    2071
True     1931
Name: is_married, dtype: int64
white       2411
black        242
Hispanic     235
other        116
Name: partner_race, dtype: int64
catholic                                                          679
protestant (e.g. methodist, lutheran, presbyterian, episcopal)    663
none                                                              542
baptist - any denomination                                        422
other christian                                                   372
jewish                                                             83
pentecostal                                                        71
mormon                                                             65
other non-christian, please specify                                56
buddhist                                                           18
muslim                                                             13
eastern orthodox                                                   11
hindu                                                               8
refused                                                             7
Name: partner_religion, dtype: int64
less than high school          1250
bachelor's degree or higher    1047
some college                    968
high school                     737
Name: partner_education, dtype: int64
True     2858
False    1144
Name: USA_raised, dtype: int64
1     1694
0      720
2      459
3+     129
Name: N_marriages, dtype: int64
False    4002
Name: cohabit, dtype: int64
19.0    172
20.0    170
18.0    162
21.0    156
17.0    148
16.0    141
23.0    126
22.0    125
24.0     96
25.0     96
26.0     95
15.0     92
28.0     85
29.0     77
27.0     73
35.0     66
30.0     66
14.0     65
33.0     63
40.0     63
32.0     55
34.0     51
31.0     48
39.0     41
36.0     39
38.0     38
42.0     38
45.0     37
41.0     34
37.0     34
       ... 
60.0      6
57.0      6
11.0      5
8.0       5
61.0      4
62.0      4
66.0      3
73.0      3
67.0      3
63.0      3
0.0       3
69.0      2
71.0      2
65.0      2
2.0       2
68.0      2
64.0      2
7.0       2
78.0      1
74.0      1
79.0      1
85.0      1
70.0      1
77.0      1
83.0      1
81.0      1
76.0      1
3.0       1
1.0       1
75.0      1
Name: age_first_met, Length: 81, dtype: int64
20.0    198
18.0    194
21.0    191
19.0    171
22.0    138
23.0    131
17.0    128
24.0    109
25.0    104
16.0    103
26.0    100
28.0     90
27.0     80
29.0     76
30.0     73
35.0     62
32.0     59
31.0     59
33.0     59
40.0     58
34.0     52
36.0     49
15.0     47
39.0     44
42.0     42
37.0     39
38.0     38
45.0     37
41.0     37
43.0     36
       ... 
57.0     10
59.0      9
60.0      7
13.0      7
67.0      6
61.0      6
58.0      6
65.0      5
63.0      4
66.0      3
69.0      3
74.0      3
70.0      2
62.0      2
81.0      2
64.0      2
12.0      2
71.0      2
79.0      2
68.0      1
4.0       1
76.0      1
0.0       1
73.0      1
75.0      1
77.0      1
85.0      1
83.0      1
78.0      1
87.0      1
Name: age_relationship_begin, Length: 73, dtype: int64
22.0    151
21.0    134
23.0    129
25.0    125
24.0    125
20.0    108
26.0    106
19.0     81
18.0     74
27.0     72
28.0     68
30.0     63
29.0     55
31.0     44
34.0     43
32.0     43
17.0     39
33.0     37
36.0     34
35.0     32
37.0     26
38.0     25
39.0     21
40.0     19
41.0     17
43.0     17
44.0     16
48.0     16
42.0     15
50.0     15
47.0     14
45.0     13
46.0     12
16.0     12
56.0     11
55.0     11
49.0     11
52.0      9
54.0      8
53.0      8
51.0      7
57.0      7
60.0      5
58.0      4
59.0      4
63.0      3
66.0      2
64.0      2
15.0      2
62.0      2
61.0      2
72.0      2
67.0      1
73.0      1
69.0      1
14.0      1
71.0      1
78.0      1
70.0      1
65.0      1
Name: age_married, dtype: int64
more    1332
less    1272
same     375
Name: relative_income, dtype: int64
False    3587
True      415
Name: same_high_school, dtype: int64
False    3638
True      364
Name: same_college, dtype: int64
False    3330
True      672
Name: same_hometown, dtype: int64
1.0     536
2.0     412
0.0     343
3.0     342
4.0     282
5.0     187
6.0     173
7.0     126
9.0      86
8.0      82
10.0     81
11.0     65
13.0     50
12.0     45
14.0     30
16.0     25
15.0     22
17.0     18
18.0     10
20.0      9
19.0      8
26.0      8
23.0      8
24.0      6
27.0      6
22.0      5
21.0      4
30.0      3
32.0      3
31.0      2
41.0      2
25.0      2
28.0      2
35.0      2
38.0      1
29.0      1
69.0      1
37.0      1
70.0      1
45.0      1
Name: age_difference, dtype: int64
excellent    1771
good          911
fair          252
poor           42
very poor      20
refused        13
Name: relationship_quality, dtype: int64
False    3732
True      270
Name: met_online, dtype: int64
False    2982
True     1020
Name: met_friends, dtype: int64
False    3558
True      444
Name: met_family, dtype: int64
False    3558
True      444
Name: met_work, dtype: int64
False    2231
True     1771
Name: relationship_excellent, dtype: int64
False    2532
True     1470
Name: is_not_working, dtype: int64
True     2431
False    1571
Name: has_gay_friends, dtype: int64
False    2653
True     1349
Name: has_gay_family, dtype: int64
True     3011
False     991
Name: religion_is_christian, dtype: int64
False    3362
True      640
Name: religion_is_none, dtype: int64
False    2804
True     1198
Name: partner_religion_is_christian, dtype: int64
False    3460
True      542
Name: partner_religion_is_none, dtype: int64



In [55]:

    
# Countplot if categorical; distplot if numeric
from pandas.api.types import is_numeric_dtype

plt.figure(figsize=(40,40))
for i, c in enumerate(df.columns):
    plt.subplot(7,7,i+1)
    if is_numeric_dtype(df[c]):
        sns.distplot(df[c].dropna(), kde=False)
    else:
        sns.countplot(y=c, data=df)
plt.savefig('temp.png')









    



/Users/scott/anaconda/lib/python3.6/site-packages/seaborn/categorical.py:1428: FutureWarning: remove_na is deprecated and is a private function. Do not use.
  stat_data = remove_na(group_data)



In [57]:

    
sns.barplot(x='income', y='race', data=df)









    



/Users/scott/anaconda/lib/python3.6/site-packages/seaborn/categorical.py:1428: FutureWarning: remove_na is deprecated and is a private function. Do not use.
  stat_data = remove_na(group_data)






    Out[57]:





<matplotlib.axes._subplots.AxesSubplot at 0x120e97278>

	caseid_new	weight1	weight2	ppage	ppagecat	ppagect4	ppeduc	ppeducat	ppethm	ppgender	pphhhead	pphouseholdsize	pphouse	ppincimp	hhinc	ppmarit	ppmsacat	ppreg4	ppreg9	pprent	ppt01	ppt1317	ppt18ov	ppt25	ppt612	children_in_hh	ppwork	ppnet	ppq14arace	pphispan	pprace_white	pprace_black	pprace_nativeamerican	pprace_asianindian	pprace_chinese	pprace_filipino	pprace_japanese	pprace_korean	pprace_vietnamese	pprace_otherasian	pprace_hawaiian	pprace_guamanian	pprace_samoan	pprace_otherpacificislander	pprace_someotherrace	papglb_friend	pppartyid3	papevangelical	papreligion	ppppcmdate_yrmo	pppadate_yrmo	pphhcomp11_member2_age	pphhcomp11_member3_age	pphhcomp11_member4_age	pphhcomp11_member5_age	pphhcomp11_member6_age	pphhcomp11_member7_age	pphhcomp11_member8_age	pphhcomp11_member9_age	pphhcomp11_member10_age	pphhcomp11_member11_age	pphhcomp11_member12_age	pphhcomp11_member13_age	pphhcomp11_member14_age	pphhcomp11_member15_age	pphhcomp11_member2_gender	pphhcomp11_member3_gender	pphhcomp11_member4_gender	pphhcomp11_member5_gender	pphhcomp11_member6_gender	pphhcomp11_member7_gender	pphhcomp11_member8_gender	pphhcomp11_member9_gender	pphhcomp11_member10_gender	pphhcomp11_member11_gender	pphhcomp11_member12_gender	pphhcomp11_member13_gender	pphhcomp11_member14_gender	pphhcomp11_member15_gender	pphhcomp11_member2_relationship	pphhcomp11_member3_relationship	pphhcomp11_member4_relationship	pphhcomp11_member5_relationship	pphhcomp11_member6_relationship	pphhcomp11_member7_relationship	pphhcomp11_member8_relationship	pphhcomp11_member9_relationship	pphhcomp11_member10_relationship	pphhcomp11_member11_relationship	pphhcomp11_member12_relationship	pphhcomp11_member13_relationship	pphhcomp11_member14_relationship	pphhcomp11_member15_relationship	irb_consent	weight3	weight4	weight5	weight6	weight7	weight_couples_coresident	HCMST_main_interview_yrmo	duration	qflag	glbstatus	papglb_status	recsource	s1	s1a	s2	q3_codes	q4	q5	q6a	q6b	q7a	q7b	q8a	q8b	q9	q10	q11	q12	q13a	q13b	q14	q15a1_compressed	q16	q17a	q17b	q17c	q17d	gender_attraction	q18a_1	q18a_2	q18a_3	q18a_refused	q18b_codes	q18c_codes	q19	q20	q21a	q21a_refusal	q21b	q21b_refusal	q21c	q21c_refusal	q21d	q21d_refusal	q21e	q21e_refusal	q22	q23	q24_codes	q25	q26	q27	q28	q29	q30	q31_1	q31_2	q31_3	q31_4	q31_5	q31_6	q31_7	q31_8	q31_9	q31_other_text_entered	q32	q33_1	q33_2	q33_3	q33_4	q33_5	q33_6	q33_7	q33_other_text_entered	q34	q35_codes	q35_text_entered	q24_met_online	summary_q24_total	q24_R_cowork	q24_R_friend	q24_R_family	q24_R_sig_other	q24_R_neighbor	q24_P_cowork	q24_P_friend	q24_P_family	q24_P_sig_other	q24_P_neighbor	q24_btwn_I_cowork	q24_btwn_I_friend	q24_btwn_I_family	q24_btwn_I_sig_other	q24_btwn_I_neighbor	q24_school	q24_college	q24_military	q24_church	q24_vol_org	q24_customer	q24_bar_restaurant	q24_internet_dating	q24_internet_social_networking	q24_internet_game	q24_internet_chat	q24_internet_community	q24_internet_other	q24_public	q24_private_party	q24_blind_date	q24_vacation	q24_singles_service_non_internet	q24_business_trip	q24_work_neighbor	q24_fam_sister_active	q24_fam_brother_active	q24_fam_mother_active	q24_fam_father_active	q24_fam_other_active	q24_fam_cousins_active	q24_fam_aunt_niece_active	q24_fam_uncle_nephew_active	q24_fam_grandmother_active	q24_fam_grandfather_active	q24_fam_sister_passive	q24_fam_brother_passive	q24_fam_mother_passive	q24_fam_father_passive	q24_fam_other_passive	q24_fam_cousins_passive	q24_fam_aunt_niece_passive	q24_fam_uncle_nephew_passive	q24_fam_grandmother_passive	q24_fam_grandfather_passive	q24_fam_female	q24_fam_male	distancemoved_10mi	marrynotreally	marrycountry	civilnotreally	partner_deceased	partner_religion_reclassified	partner_religion_child_reclass	own_religion_child_reclass	q32_internet	how_met_online	either_internet	either_internet_adjusted	same_sex_couple	potential_partner_gender_recodes	alt_partner_gender	how_long_ago_first_met	how_long_ago_first_romantic	how_long_ago_first_cohab	how_long_ago_first_met_cat	how_long_relationship	respondent_race	partner_race	age_difference	met_through_friends	met_through_family	met_through_as_neighbors	met_through_as_coworkers	respondent_religion_at_16	respondent_relig_16_cat	partner_religion_at_16	partner_relig_16_cat	married	parental_approval	respondent_yrsed	partner_yrsed	home_country_recode	US_raised	partner_mom_yrsed	respondent_mom_yrsed	relationship_quality	coresident	pp2_afterp1	pp2_pphhhead	pp2_pphhsize	pp2_pphouse	pp2_ppincimp	pp2_ppmarit	pp2_ppmsacat	pp2_ppeduc	pp2_ppeducat	pp2_respondent_yrsed	pp2_ppethm	pp2_ppreg4	pp2_ppreg9	pp2_pprent	pp2_ppt01	pp2_ppt1317	pp2_ppt18ov	pp2_ppt25	pp2_ppt612	pp2_ppwork	pp2_ppnet	pp2_ppcmdate_yrmo	pp_igdr1	pp_ieduc1	pp2_igdr2	pp2_ieduc2	w2_deceased	w2_multiname	w2_panelstat	w2_donotcontact	w2_assigned	w2_f1complete	w2_HCMST_interview_fin_yrmo	w2_duration	w2_xmarry	w2_xss	w2_source	w2_q1	w2_q2	w2_q3	w2_q4	w2_q5	w2_q6	w2_q7	w2_q8	w2_q9	w2_q10	w2_broke_up	w2_days_elapsed	pp3_pphhhead	pp3_pphhsize	pp3_pphouse	pp3_ppincimp	pp3_ppmarit	pp3_ppmsacat	pp3_pprent	pp3_ppreg4	pp3_ppreg9	interstate_mover_pp1_pp2	interstate_mover_pp2_pp3	interstate_mover_pp1_pp3	pp3_ppt01	pp3_ppt1317	pp3_ppt18ov	pp3_ppt25	pp3_ppt612	pp3_ppwork	pp3_ppnet	pp3_ppcmdate_yrmo	pp3_ppeduc	pp3_ppeducat	pp3_respondent_yrsed	pp3_ppethm	pp3_newer	w2w3_combo_breakup	w3_broke_up	w3_xpartnered	w3_xdeceased	w3_multiname	w3_xss	w3_xlast	w3_xyear	w3_xmonth	w3_xqualified	w3_status	w3_complete	w3_source	w3_HCMST_interview_fin_yrmo	w3_days_elapsed	w3_duration	w3_xmarry	w3_xtype	w3_q1	w3_q2	w3_q3	w3_q4	w3_mbtiming_year	w3_mbtiming_month	w3_q5	w3_q6	w3_q7	w3_q8	w3_q9	w3_q10	w3_nonmbtiming_year	w3_nonmbtiming_month
0	22526	4265	4265.0	52	45-54	45-59	bachelors degree	bachelor's degree or higher	hispanic	female	yes	2	a building with 2 or more apartments	$20,000 to $24,999	22250.0	living with partner	metro	midwest	east-north central	rented for cash	0	0	2	0	0	0	working - as a paid employee	yes	not asked	yes, other spanish/hispanic/latino	yes	no	no	no	no	no	no	no	no	no	no	no	no	no	no	yes, friends	democrat	yes	catholic	200711.0	200709.0	47.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	female	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	unmarried partner	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	yes, i agree to participate	4265.0	5618	7505.0	3576.0	3576.0	634.354187	200902.0	20	partnered	glb	yes	glb augment sample	no, i am not married	NaN	yes, i have a sexual partner (boyfriend or gir...	NaN	female	yes, we are a same-sex couple	no (not latino or hispanic)	white	no	protestant (e.g. methodist, lutheran, presbyte...	yes, the same	NaN	48.0	associate degree	hs graduate or ged	democrat	no, i have changed religions	protestant (e.g. methodist, lutheran, presbyte...	bachelor's degree	United States	0.0	NaN	once	i am mostly sexually attracted to women, less ...	NaN	same gender mostly	no	no	have neither DP nor CU	no	NaN	NaN	yes	NaN	45.0	NaN	45.0	NaN	45.0	NaN	NaN	NaN	NaN	NaN	NaN	partner earned more	NaN	different high school	did not attend same college or university	no	no	neither father nor mother are alive	NaN	no	no	no	yes	no	no	no	no	no	No	yes, a different kind of internet service	refused	refused	refused	refused	refused	refused	refused	No	good	NaN	Yes	met online	2.0	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	Yes	No	No	No	No	No	Yes	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	10.0	NaN		NaN	not deceased	NaN	NaN	NaN	1.0	Previously Strangers: Before online connection...	Yes	met online	same-sex couple	NaN	female	7.0	7.0	7.0	6-10	7.0	Hispanic	NH white	4.0	not met through friends	not met through family	did not meet through or as neighbors	0.0	protestant (e.g. methodist, lutheran, presbyte...	Protestant or oth Christian	protestant (e.g. methodist, lutheran, presbyte...	Protestant or oth Christian	not married	NaN	16.0	14.0	NaN	raised in US	12.0	16.0	good	Yes	Yes second background survey	yes	2.0	a building with 2 or more apartments	$20,000 to $24,999	living with partner	metro	bachelors degree	bachelor's degree or higher	16.0	hispanic	midwest	east-north central	rented for cash	0.0	0.0	2.0	0.0	0.0	not working - looking for work	yes	200905.0	value not imputed	value not imputed	value not imputed	value not imputed	not deceased	NaN	withdrawn kn panelist	all other cases	assigned to survey	completed followup survey	201003.0	6.0	partnered	yes, qualified to ask about new domestic parte...	online	NaN	NaN	NaN	NaN	yes	yes	no, did not marry [partner]	no, we have not gotten a domestic partnership ...	NaN	NaN	still together	390.0	Yes	2.0	A building with 2 or more apartments	$15,000 to $19,999	Living with partner	Metro	Rented for cash	Midwest	East-North Central	stayer	stayer	stayer	0.0	0.0	2.0	0.0	0.0	Not working - looking for work	Yes	201107.0	Bachelors degree	Bachelor's degree or higher	16.0	Hispanic	Yes, pp3 data is newer and available	still together, or lost to follow-up, or partn...	still together	Qualified for follow-up at wave3	not deceased	NaN	yes	1 year ago	2010.0	3.0	qualified for wave 3	active member of KN panel	yes	Online	201104.0	774.0	2.0	Partnered	same sex couple	NaN	NaN	NaN	NaN	NaN	NaN	yes	yes	no, did not marry [xNameP]	No, we have not gotten a domestic partnership ...	NaN	NaN	NaN	NaN
1	23286	16485	16485.0	28	25-34	18-29	masters degree	bachelor's degree or higher	white, non-hispanic	female	yes	2	a building with 2 or more apartments	$40,000 to $49,999	45000.0	living with partner	metro	west	pacific	rented for cash	0	0	2	0	0	0	working - as a paid employee	yes	not asked	no, i am not	yes	no	no	no	no	no	no	no	no	no	no	no	no	no	no	yes, both	democrat	no	jewish	200711.0	200709.0	29.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	female	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	spouse	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	yes, i agree to participate	16485.0	8383	11198.0	10241.0	10241.0	9589.906250	200902.0	13	partnered	glb	yes	glb augment sample	yes, i am married	NaN	NaN	NaN	female	yes, we are a same-sex couple	no (not latino or hispanic)	white	no	none	no, has changed religions	other christian	30.0	bachelor's degree	bachelor's degree	democrat	no, i have changed religions	jewish	professional or doctorate degree	United States	0.0	once (this is my first marriage)	NaN	i am equally sexually attracted to men and women	NaN	both genders equally	yes	no	have either DP or CU	no	NaN	NaN	yes	NaN	19.0	NaN	20.0	NaN	22.0	NaN	23.0	NaN	26.0	NaN	NaN	i earned more	NaN	different high school	attended same college or university	no	no	father and mother	disapprove	no	yes	no	no	no	no	no	no	no	No	no, we did not meet through the internet	no	no	no	yes	no	no	no	No	good	NaN	Yes	met offline	4.0	No	Yes	No	No	Yes	No	Yes	No	No	No	No	No	No	No	No	No	Yes	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	No	2510.0	married	USA	real civ union or dom partnership	not deceased	NaN	NaN	NaN	0.0	NaN	No	not met online	same-sex couple	NaN	female	9.0	8.0	6.0	6-10	8.0	NH white	NH white	2.0	meet through friends	not met through family	met through or as neighbors	0.0	jewish	Jewish	other christian	Protestant or oth Christian	married	don't approve or don't know	17.0	16.0	NaN	raised in US	16.0	20.0	good	Yes	Yes second background survey	yes	2.0	a building with 2 or more apartments	$100,000 to $124,999	married	metro	professional or doctorate degree	bachelor's degree or higher	20.0	white, non-hispanic	west	pacific	rented for cash	0.0	0.0	2.0	0.0	0.0	working - as a paid employee	yes	200904.0	value not imputed	value not imputed	value not imputed	value not imputed	not deceased	NaN	withdrawn kn panelist	all other cases	assigned to survey	completed followup survey	201003.0	0.0	married	no	online	yes	yes	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	still together	390.0	Yes	2.0	A one-family house attached to one or more houses	$85,000 to $99,999	Living with partner	Metro	Rented for cash	West	Pacific	stayer	stayer	stayer	0.0	0.0	2.0	0.0	0.0	Working - as a paid employee	Yes	201106.0	Professional or Doctorate degree	Bachelor's degree or higher	20.0	White, Non-Hispanic	Yes, pp3 data is newer and available	still together, or lost to follow-up, or partn...	still together	Qualified for follow-up at wave3	not deceased	NaN	no	1 year ago	2010.0	3.0	qualified for wave 3	active member of KN panel	yes	Online	201104.0	788.0	0.0	Married	same sex couple	yes	yes	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

Data info

Data notes

Load raw data

Select and rename columns

Distributions