Data notes

Wave I, the main survey, was fielded between February 21 and April 2, 2009. Wave 2 was fielded March 12, 2010 to June 8, 2010. Wave 3 was fielded March 22, 2011 to August 29, 2011. Wave 4 was fielded between March and November of 2013. Wave 5 was fielded between November, 2014 and March, 2015.


In [1]:
import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

pd.options.display.max_columns=1000

Load raw data


In [28]:
df = pd.read_stata('/gh/data/hcmst/1.dta')
# df2 = pd.read_stata('/gh/data/hcmst/2.dta')
# df3 = pd.read_stata('/gh/data/hcmst/3.dta')
# df = df1.merge(df2, on='caseid_new')
# df = df.merge(df3, on='caseid_new')
df.head(2)


Out[28]:
caseid_new weight1 weight2 ppage ppagecat ppagect4 ppeduc ppeducat ppethm ppgender pphhhead pphouseholdsize pphouse ppincimp hhinc ppmarit ppmsacat ppreg4 ppreg9 pprent ppt01 ppt1317 ppt18ov ppt25 ppt612 children_in_hh ppwork ppnet ppq14arace pphispan pprace_white pprace_black pprace_nativeamerican pprace_asianindian pprace_chinese pprace_filipino pprace_japanese pprace_korean pprace_vietnamese pprace_otherasian pprace_hawaiian pprace_guamanian pprace_samoan pprace_otherpacificislander pprace_someotherrace papglb_friend pppartyid3 papevangelical papreligion ppppcmdate_yrmo pppadate_yrmo pphhcomp11_member2_age pphhcomp11_member3_age pphhcomp11_member4_age pphhcomp11_member5_age pphhcomp11_member6_age pphhcomp11_member7_age pphhcomp11_member8_age pphhcomp11_member9_age pphhcomp11_member10_age pphhcomp11_member11_age pphhcomp11_member12_age pphhcomp11_member13_age pphhcomp11_member14_age pphhcomp11_member15_age pphhcomp11_member2_gender pphhcomp11_member3_gender pphhcomp11_member4_gender pphhcomp11_member5_gender pphhcomp11_member6_gender pphhcomp11_member7_gender pphhcomp11_member8_gender pphhcomp11_member9_gender pphhcomp11_member10_gender pphhcomp11_member11_gender pphhcomp11_member12_gender pphhcomp11_member13_gender pphhcomp11_member14_gender pphhcomp11_member15_gender pphhcomp11_member2_relationship pphhcomp11_member3_relationship pphhcomp11_member4_relationship pphhcomp11_member5_relationship pphhcomp11_member6_relationship pphhcomp11_member7_relationship pphhcomp11_member8_relationship pphhcomp11_member9_relationship pphhcomp11_member10_relationship pphhcomp11_member11_relationship pphhcomp11_member12_relationship pphhcomp11_member13_relationship pphhcomp11_member14_relationship pphhcomp11_member15_relationship irb_consent weight3 weight4 weight5 weight6 weight7 weight_couples_coresident HCMST_main_interview_yrmo duration qflag glbstatus papglb_status recsource s1 s1a s2 q3_codes q4 q5 q6a q6b q7a q7b q8a q8b q9 q10 q11 q12 q13a q13b q14 q15a1_compressed q16 q17a q17b q17c q17d gender_attraction q18a_1 q18a_2 q18a_3 q18a_refused q18b_codes q18c_codes q19 q20 q21a q21a_refusal q21b q21b_refusal q21c q21c_refusal q21d q21d_refusal q21e q21e_refusal q22 q23 q24_codes q25 q26 q27 q28 q29 q30 q31_1 q31_2 q31_3 q31_4 q31_5 q31_6 q31_7 q31_8 q31_9 q31_other_text_entered q32 q33_1 q33_2 q33_3 q33_4 q33_5 q33_6 q33_7 q33_other_text_entered q34 q35_codes q35_text_entered q24_met_online summary_q24_total q24_R_cowork q24_R_friend q24_R_family q24_R_sig_other q24_R_neighbor q24_P_cowork q24_P_friend q24_P_family q24_P_sig_other q24_P_neighbor q24_btwn_I_cowork q24_btwn_I_friend q24_btwn_I_family q24_btwn_I_sig_other q24_btwn_I_neighbor q24_school q24_college q24_military q24_church q24_vol_org q24_customer q24_bar_restaurant q24_internet_dating q24_internet_social_networking q24_internet_game q24_internet_chat q24_internet_community q24_internet_other q24_public q24_private_party q24_blind_date q24_vacation q24_singles_service_non_internet q24_business_trip q24_work_neighbor q24_fam_sister_active q24_fam_brother_active q24_fam_mother_active q24_fam_father_active q24_fam_other_active q24_fam_cousins_active q24_fam_aunt_niece_active q24_fam_uncle_nephew_active q24_fam_grandmother_active q24_fam_grandfather_active q24_fam_sister_passive q24_fam_brother_passive q24_fam_mother_passive q24_fam_father_passive q24_fam_other_passive q24_fam_cousins_passive q24_fam_aunt_niece_passive q24_fam_uncle_nephew_passive q24_fam_grandmother_passive q24_fam_grandfather_passive q24_fam_female q24_fam_male distancemoved_10mi marrynotreally marrycountry civilnotreally partner_deceased partner_religion_reclassified partner_religion_child_reclass own_religion_child_reclass q32_internet how_met_online either_internet either_internet_adjusted same_sex_couple potential_partner_gender_recodes alt_partner_gender how_long_ago_first_met how_long_ago_first_romantic how_long_ago_first_cohab how_long_ago_first_met_cat how_long_relationship respondent_race partner_race age_difference met_through_friends met_through_family met_through_as_neighbors met_through_as_coworkers respondent_religion_at_16 respondent_relig_16_cat partner_religion_at_16 partner_relig_16_cat married parental_approval respondent_yrsed partner_yrsed home_country_recode US_raised partner_mom_yrsed respondent_mom_yrsed relationship_quality coresident pp2_afterp1 pp2_pphhhead pp2_pphhsize pp2_pphouse pp2_ppincimp pp2_ppmarit pp2_ppmsacat pp2_ppeduc pp2_ppeducat pp2_respondent_yrsed pp2_ppethm pp2_ppreg4 pp2_ppreg9 pp2_pprent pp2_ppt01 pp2_ppt1317 pp2_ppt18ov pp2_ppt25 pp2_ppt612 pp2_ppwork pp2_ppnet pp2_ppcmdate_yrmo pp_igdr1 pp_ieduc1 pp2_igdr2 pp2_ieduc2 w2_deceased w2_multiname w2_panelstat w2_donotcontact w2_assigned w2_f1complete w2_HCMST_interview_fin_yrmo w2_duration w2_xmarry w2_xss w2_source w2_q1 w2_q2 w2_q3 w2_q4 w2_q5 w2_q6 w2_q7 w2_q8 w2_q9 w2_q10 w2_broke_up w2_days_elapsed pp3_pphhhead pp3_pphhsize pp3_pphouse pp3_ppincimp pp3_ppmarit pp3_ppmsacat pp3_pprent pp3_ppreg4 pp3_ppreg9 interstate_mover_pp1_pp2 interstate_mover_pp2_pp3 interstate_mover_pp1_pp3 pp3_ppt01 pp3_ppt1317 pp3_ppt18ov pp3_ppt25 pp3_ppt612 pp3_ppwork pp3_ppnet pp3_ppcmdate_yrmo pp3_ppeduc pp3_ppeducat pp3_respondent_yrsed pp3_ppethm pp3_newer w2w3_combo_breakup w3_broke_up w3_xpartnered w3_xdeceased w3_multiname w3_xss w3_xlast w3_xyear w3_xmonth w3_xqualified w3_status w3_complete w3_source w3_HCMST_interview_fin_yrmo w3_days_elapsed w3_duration w3_xmarry w3_xtype w3_q1 w3_q2 w3_q3 w3_q4 w3_mbtiming_year w3_mbtiming_month w3_q5 w3_q6 w3_q7 w3_q8 w3_q9 w3_q10 w3_nonmbtiming_year w3_nonmbtiming_month
0 22526 4265 4265.0 52 45-54 45-59 bachelors degree bachelor's degree or higher hispanic female yes 2 a building with 2 or more apartments $20,000 to $24,999 22250.0 living with partner metro midwest east-north central rented for cash 0 0 2 0 0 0 working - as a paid employee yes not asked yes, other spanish/hispanic/latino yes no no no no no no no no no no no no no no yes, friends democrat yes catholic 200711.0 200709.0 47.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN unmarried partner NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN yes, i agree to participate 4265.0 5618 7505.0 3576.0 3576.0 634.354187 200902.0 20 partnered glb yes glb augment sample no, i am not married NaN yes, i have a sexual partner (boyfriend or gir... NaN female yes, we are a same-sex couple no (not latino or hispanic) white no protestant (e.g. methodist, lutheran, presbyte... yes, the same NaN 48.0 associate degree hs graduate or ged democrat no, i have changed religions protestant (e.g. methodist, lutheran, presbyte... bachelor's degree United States 0.0 NaN once i am mostly sexually attracted to women, less ... NaN same gender mostly no no have neither DP nor CU no NaN NaN yes NaN 45.0 NaN 45.0 NaN 45.0 NaN NaN NaN NaN NaN NaN partner earned more NaN different high school did not attend same college or university no no neither father nor mother are alive NaN no no no yes no no no no no No yes, a different kind of internet service refused refused refused refused refused refused refused No good NaN Yes met online 2.0 No No No No No No No No No No No No No No No No No No No No No No Yes No No No No No Yes No No No No No No No No No No No No No No No No No No No No No No No No No No No No 10.0 NaN NaN not deceased NaN NaN NaN 1.0 Previously Strangers: Before online connection... Yes met online same-sex couple NaN female 7.0 7.0 7.0 6-10 7.0 Hispanic NH white 4.0 not met through friends not met through family did not meet through or as neighbors 0.0 protestant (e.g. methodist, lutheran, presbyte... Protestant or oth Christian protestant (e.g. methodist, lutheran, presbyte... Protestant or oth Christian not married NaN 16.0 14.0 NaN raised in US 12.0 16.0 good Yes Yes second background survey yes 2.0 a building with 2 or more apartments $20,000 to $24,999 living with partner metro bachelors degree bachelor's degree or higher 16.0 hispanic midwest east-north central rented for cash 0.0 0.0 2.0 0.0 0.0 not working - looking for work yes 200905.0 value not imputed value not imputed value not imputed value not imputed not deceased NaN withdrawn kn panelist all other cases assigned to survey completed followup survey 201003.0 6.0 partnered yes, qualified to ask about new domestic parte... online NaN NaN NaN NaN yes yes no, did not marry [partner] no, we have not gotten a domestic partnership ... NaN NaN still together 390.0 Yes 2.0 A building with 2 or more apartments $15,000 to $19,999 Living with partner Metro Rented for cash Midwest East-North Central stayer stayer stayer 0.0 0.0 2.0 0.0 0.0 Not working - looking for work Yes 201107.0 Bachelors degree Bachelor's degree or higher 16.0 Hispanic Yes, pp3 data is newer and available still together, or lost to follow-up, or partn... still together Qualified for follow-up at wave3 not deceased NaN yes 1 year ago 2010.0 3.0 qualified for wave 3 active member of KN panel yes Online 201104.0 774.0 2.0 Partnered same sex couple NaN NaN NaN NaN NaN NaN yes yes no, did not marry [xNameP] No, we have not gotten a domestic partnership ... NaN NaN NaN NaN
1 23286 16485 16485.0 28 25-34 18-29 masters degree bachelor's degree or higher white, non-hispanic female yes 2 a building with 2 or more apartments $40,000 to $49,999 45000.0 living with partner metro west pacific rented for cash 0 0 2 0 0 0 working - as a paid employee yes not asked no, i am not yes no no no no no no no no no no no no no no yes, both democrat no jewish 200711.0 200709.0 29.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN spouse NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN yes, i agree to participate 16485.0 8383 11198.0 10241.0 10241.0 9589.906250 200902.0 13 partnered glb yes glb augment sample yes, i am married NaN NaN NaN female yes, we are a same-sex couple no (not latino or hispanic) white no none no, has changed religions other christian 30.0 bachelor's degree bachelor's degree democrat no, i have changed religions jewish professional or doctorate degree United States 0.0 once (this is my first marriage) NaN i am equally sexually attracted to men and women NaN both genders equally yes no have either DP or CU no NaN NaN yes NaN 19.0 NaN 20.0 NaN 22.0 NaN 23.0 NaN 26.0 NaN NaN i earned more NaN different high school attended same college or university no no father and mother disapprove no yes no no no no no no no No no, we did not meet through the internet no no no yes no no no No good NaN Yes met offline 4.0 No Yes No No Yes No Yes No No No No No No No No No Yes No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No 2510.0 married USA real civ union or dom partnership not deceased NaN NaN NaN 0.0 NaN No not met online same-sex couple NaN female 9.0 8.0 6.0 6-10 8.0 NH white NH white 2.0 meet through friends not met through family met through or as neighbors 0.0 jewish Jewish other christian Protestant or oth Christian married don't approve or don't know 17.0 16.0 NaN raised in US 16.0 20.0 good Yes Yes second background survey yes 2.0 a building with 2 or more apartments $100,000 to $124,999 married metro professional or doctorate degree bachelor's degree or higher 20.0 white, non-hispanic west pacific rented for cash 0.0 0.0 2.0 0.0 0.0 working - as a paid employee yes 200904.0 value not imputed value not imputed value not imputed value not imputed not deceased NaN withdrawn kn panelist all other cases assigned to survey completed followup survey 201003.0 0.0 married no online yes yes NaN NaN NaN NaN NaN NaN NaN NaN still together 390.0 Yes 2.0 A one-family house attached to one or more houses $85,000 to $99,999 Living with partner Metro Rented for cash West Pacific stayer stayer stayer 0.0 0.0 2.0 0.0 0.0 Working - as a paid employee Yes 201106.0 Professional or Doctorate degree Bachelor's degree or higher 20.0 White, Non-Hispanic Yes, pp3 data is newer and available still together, or lost to follow-up, or partn... still together Qualified for follow-up at wave3 not deceased NaN no 1 year ago 2010.0 3.0 qualified for wave 3 active member of KN panel yes Online 201104.0 788.0 0.0 Married same sex couple yes yes NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

Select and rename columns


In [29]:
rename_cols_dict = {'ppage': 'age', 'ppeducat': 'education',
                   'ppethm': 'race', 'ppgender': 'sex',
                   'pphouseholdsize': 'household_size', 'pphouse': 'house_type',
                   'hhinc': 'income', 'ppmarit': 'marital_status',
                   'ppmsacat': 'in_metro', 'ppreg4': 'usa_region',
                   'pprent': 'house_payment', 'children_in_hh': 'N_child',
                   'ppwork': 'work', 'ppnet': 'has_internet',
                   'papglb_friend': 'has_gay_friendsfam', 'pppartyid3': 'politics',
                   'papreligion': 'religion', 'qflag': 'in_relationship',
                   'q9': 'partner_age', 'duration': 'N_minutes_survey',
                   'glbstatus': 'is_lgb', 's1': 'is_married',
                   'partner_race': 'partner_race', 'q7b': 'partner_religion',
                   'q10': 'partner_education', 'US_raised': 'USA_raised',
                   'q17a': 'N_marriages', 'q17b': 'N_marriages2', 'coresident': 'cohabit',
                   'q21a': 'age_first_met', 'q21b': 'age_relationship_begin',
                   'q21d': 'age_married', 'q23': 'relative_income',
                   'q25': 'same_high_school', 'q26': 'same_college',
                   'q27': 'same_hometown', 'age_difference': 'age_difference',
                   'q34':'relationship_quality',
                   'q24_met_online': 'met_online', 'met_through_friends': 'met_friends',
                   'met_through_family': 'met_family', 'met_through_as_coworkers': 'met_work'}

df = df[list(rename_cols_dict.keys())]
df.rename(columns=rename_cols_dict, inplace=True)

# Process number of marriages
df['N_marriages'] = df['N_marriages'].astype(str).replace({'nan':''}) + df['N_marriages2'].astype(str).replace({'nan':''})
df.drop('N_marriages2', axis=1, inplace=True)
df['N_marriages'] = df['N_marriages'].replace({'':np.nan, 'once (this is my first marriage)': 'once', 'refused':np.nan})
df['N_marriages'] = df['N_marriages'].astype('category')

In [30]:
# Clean entries to make simpler
df['in_metro'] = df['in_metro']=='metro'
df['relationship_excellent'] = df['relationship_quality'] == 'excellent'

df['house_payment'].replace({'owned or being bought by you or someone in your household': 'owned',
                         'rented for cash': 'rent',
                         'occupied without payment of cash rent': 'free'}, inplace=True)
df['race'].replace({'white, non-hispanic': 'white',
                '2+ races, non-hispanic': 'other, non-hispanic',
                'black, non-hispanic': 'black'}, inplace=True)
df['house_type'].replace({'a one-family house detached from any other house': 'house',
                'a building with 2 or more apartments': 'apartment',
                'a one-family house attached to one or more houses': 'house',
                'a mobile home': 'mobile',
                'boat, rv, van, etc.': 'mobile'}, inplace=True)
df['is_not_working'] = df['work'].str.contains('not working')
df['has_internet'] = df['has_internet'] == 'yes'
df['has_gay_friends'] = np.logical_or(df['has_gay_friendsfam']=='yes, friends', df['has_gay_friendsfam']=='yes, both')
df['has_gay_family'] = np.logical_or(df['has_gay_friendsfam']=='yes, relatives', df['has_gay_friendsfam']=='yes, both')
df['religion_is_christian'] = df['religion'].isin(['protestant (e.g., methodist, lutheran, presbyterian, episcopal)',
                                                   'catholic', 'baptist-any denomination', 'other christian', 'pentecostal', 'mormon', 'eastern orthodox'])
df['religion_is_none'] = df['religion'].isin(['none'])
df['in_relationship'] = df['in_relationship']=='partnered'
df['is_lgb'] = df['is_lgb']=='glb'
df['is_married'] = df['is_married']=='yes, i am married'
df['partner_race'].replace({'NH white': 'white', ' NH black': 'black',
                            ' NH Asian Pac Islander':'other', ' NH Other': 'other', ' NH Amer Indian': 'other'}, inplace=True)
df['partner_religion_is_christian'] = df['partner_religion'].isin(['protestant (e.g., methodist, lutheran, presbyterian, episcopal)',
                                                   'catholic', 'baptist-any denomination', 'other christian', 'pentecostal', 'mormon', 'eastern orthodox'])
df['partner_religion_is_none'] = df['partner_religion'].isin(['none'])
df['partner_education'] = df['partner_education'].map({'hs graduate or ged': 'high school',
                                                       'some college, no degree': 'some college',
                                                       "associate degree": "some college",
                                                       "bachelor's degree": "bachelor's degree or higher",
                                                       "master's degree": "bachelor's degree or higher",
                                                       "professional or doctorate degree": "bachelor's degree or higher"})
df['partner_education'].fillna('less than high school', inplace=True)
df['USA_raised'] = df['USA_raised']=='raised in US'
df['N_marriages'] = df['N_marriages'].map({'never married': '0', 'once': '1', 'twice': '2', 'three times': '3+', 'four or more times':'3+'})
df['relative_income'].replace({'i earned more': 'more', 'partner earned more': 'less',
                               'we earned about the same amount': 'same', 'refused': np.nan}, inplace=True)
df['same_high_school'] = df['same_high_school']=='same high school'
df['same_college'] = df['same_college']=='attended same college or university'
df['same_hometown'] = df['same_hometown']=='yes'
df['cohabit'] = df['cohabit']=='yes'
df['met_online'] = df['met_online']=='met online'
df['met_friends'] = df['met_friends']=='meet through friends'
df['met_family'] = df['met_family']=='met through family'
df['met_work'] = df['met_family']==1

df['age'] = df['age'].astype(int)
for c in df.columns:
    if str(type(df[c])) == 'object':
        df[c] = df[c].astype('category')

In [53]:
df.head()


Out[53]:
age education race sex household_size house_type income marital_status in_metro usa_region house_payment N_child work has_internet has_gay_friendsfam politics religion in_relationship partner_age N_minutes_survey is_lgb is_married partner_race partner_religion partner_education USA_raised N_marriages cohabit age_first_met age_relationship_begin age_married relative_income same_high_school same_college same_hometown age_difference relationship_quality met_online met_friends met_family met_work relationship_excellent is_not_working has_gay_friends has_gay_family religion_is_christian religion_is_none partner_religion_is_christian partner_religion_is_none
0 52 bachelor's degree or higher hispanic female 2 apartment 22250.0 living with partner True midwest rent 0 working - as a paid employee True yes, friends democrat catholic True 48.0 20 True False white protestant (e.g. methodist, lutheran, presbyte... some college True 1 False 45.0 45.0 NaN less False False False 4.0 good True False False False False False True False True False False False
1 28 bachelor's degree or higher white female 2 apartment 45000.0 living with partner True west rent 0 working - as a paid employee True yes, both democrat jewish True 30.0 13 True True white none bachelor's degree or higher True 1 False 19.0 20.0 23.0 more False True False 2.0 good False True False False False False True True False False False True
2 49 high school black female 4 apartment 37250.0 never married True south rent 1 working - as a paid employee True yes, both democrat baptist-any denomination False NaN 0 False False NaN NaN less than high school False NaN False NaN NaN NaN NaN False False False NaN NaN False False False False False False True True True False False False
3 31 some college white male 1 apartment 45000.0 never married True south owned 0 working - as a paid employee True yes, both democrat other non-christian, please specify: True 40.0 9 True False white other non-christian, please specify high school True 0 False 23.0 23.0 NaN more False False False 9.0 good True True False False False False True True False False False False
4 35 high school white male 2 house 45000.0 never married True south owned 0 working - self-employed True yes, friends democrat other christian False NaN 1 True False NaN NaN less than high school False NaN False NaN NaN NaN NaN False False False NaN NaN False False False False False False True False True False False False

In [58]:
df.to_csv('/gh/data/hcmst/1_cleaned.csv')

Distributions


In [54]:
for c in df.columns:
    print(df[c].value_counts())


56    107
44    104
29    103
43     96
50     96
49     94
55     92
28     91
41     90
54     90
51     86
38     86
39     86
59     84
53     83
58     82
45     78
42     78
48     77
20     76
27     75
40     74
46     73
57     71
36     71
62     68
52     67
47     66
21     65
32     64
     ... 
66     48
69     41
68     40
73     38
70     36
19     33
76     33
71     33
74     30
72     29
78     29
75     25
79     25
77     23
82     22
80     21
83     17
85     13
84     12
81     11
89      6
88      6
86      4
92      3
90      3
91      2
87      2
93      1
94      1
95      1
Name: age, Length: 77, dtype: int64
bachelor's degree or higher    1407
some college                   1210
high school                     987
less than high school           398
Name: education, dtype: int64
white                  3007
hispanic                387
black                   321
other, non-hispanic     287
Name: race, dtype: int64
female    2035
male      1967
Name: sex, dtype: int64
2     1461
1      970
3      581
4      566
5      252
6      102
7       42
8       14
9        7
10       3
11       2
15       1
12       1
Name: household_size, dtype: int64
house        3113
apartment     720
mobile        169
Name: house_type, dtype: int64
67250.0     461
55000.0     422
45000.0     397
92250.0     311
112250.0    300
80000.0     296
37250.0     260
22250.0     219
27250.0     212
32250.0     201
137250.0    162
17250.0     157
200000.0    127
11250.0     106
8750.0      100
162250.0     87
13750.0      85
6250.0       54
2500.0       45
Name: income, dtype: int64
married                1736
never married          1020
living with partner     503
divorced                477
widowed                 204
separated                62
Name: marital_status, dtype: int64
True     3436
False     566
Name: in_metro, dtype: int64
south        1262
west          975
midwest       955
northeast     810
Name: usa_region, dtype: int64
owned    3040
rent      894
free       68
Name: house_payment, dtype: int64
0    3075
1     443
2     331
3     113
4      31
5       7
7       1
6       1
Name: N_child, dtype: int64
working - as a paid employee                    2191
not working - retired                            637
not working - disabled                           347
working - self-employed                          341
not working - other                              286
not working - looking for work                   180
not working - on temporary layoff from a job      20
Name: work, dtype: int64
True     2770
False    1232
Name: has_internet, dtype: int64
yes, friends                                  1372
no                                            1171
yes, both                                     1059
yes, relatives                                 290
i would prefer to not answer this question     100
Name: has_gay_friendsfam, dtype: int64
democrat      2385
republican    1505
other          112
Name: politics, dtype: int64
protestant (e.g., methodist, lutheran, presbyterian, episcopal)    918
catholic                                                           897
none                                                               640
baptist-any denomination                                           523
other christian                                                    482
other non-christian, please specify:                               172
pentecostal                                                        110
jewish                                                             110
mormon                                                              66
buddhist                                                            33
eastern orthodox                                                    15
hindu                                                               13
muslim                                                               8
Name: religion, dtype: int64
True     3009
False     993
Name: in_relationship, dtype: int64
50.0    84
30.0    83
29.0    77
36.0    76
31.0    74
38.0    74
43.0    73
42.0    71
44.0    71
51.0    69
55.0    68
47.0    67
39.0    66
28.0    65
40.0    63
52.0    63
34.0    61
45.0    61
41.0    60
33.0    59
32.0    59
46.0    59
54.0    58
35.0    58
37.0    58
49.0    56
48.0    56
60.0    54
53.0    53
58.0    51
        ..
69.0    26
73.0    23
66.0    23
71.0    23
75.0    21
77.0    20
67.0    19
78.0    19
19.0    18
68.0    18
82.0    18
74.0    17
80.0    12
81.0    11
76.0    10
79.0     8
18.0     7
83.0     7
85.0     6
86.0     4
84.0     4
89.0     3
87.0     3
17.0     2
91.0     2
99.0     2
16.0     2
88.0     1
92.0     1
90.0     1
Name: partner_age, Length: 78, dtype: int64
1        366
0        337
8        184
9        183
10       176
11       173
12       164
2        155
7        155
13       144
14       133
6        121
17       112
15       107
16        90
21        84
18        84
5         72
19        70
20        65
23        54
3         54
24        49
4         47
25        40
27        39
29        38
22        38
26        38
28        30
        ... 
281        1
277        1
4359       1
261        1
251        1
239        1
211        1
18620      1
187        1
171        1
14504      1
167        1
157        1
147        1
143        1
137        1
131        1
129        1
121        1
113        1
109        1
107        1
101        1
99         1
95         1
89         1
87         1
38994      1
73         1
4076       1
Name: N_minutes_survey, Length: 316, dtype: int64
False    3047
True      955
Name: is_lgb, dtype: int64
False    2071
True     1931
Name: is_married, dtype: int64
white       2411
black        242
Hispanic     235
other        116
Name: partner_race, dtype: int64
catholic                                                          679
protestant (e.g. methodist, lutheran, presbyterian, episcopal)    663
none                                                              542
baptist - any denomination                                        422
other christian                                                   372
jewish                                                             83
pentecostal                                                        71
mormon                                                             65
other non-christian, please specify                                56
buddhist                                                           18
muslim                                                             13
eastern orthodox                                                   11
hindu                                                               8
refused                                                             7
Name: partner_religion, dtype: int64
less than high school          1250
bachelor's degree or higher    1047
some college                    968
high school                     737
Name: partner_education, dtype: int64
True     2858
False    1144
Name: USA_raised, dtype: int64
1     1694
0      720
2      459
3+     129
Name: N_marriages, dtype: int64
False    4002
Name: cohabit, dtype: int64
19.0    172
20.0    170
18.0    162
21.0    156
17.0    148
16.0    141
23.0    126
22.0    125
24.0     96
25.0     96
26.0     95
15.0     92
28.0     85
29.0     77
27.0     73
35.0     66
30.0     66
14.0     65
33.0     63
40.0     63
32.0     55
34.0     51
31.0     48
39.0     41
36.0     39
38.0     38
42.0     38
45.0     37
41.0     34
37.0     34
       ... 
60.0      6
57.0      6
11.0      5
8.0       5
61.0      4
62.0      4
66.0      3
73.0      3
67.0      3
63.0      3
0.0       3
69.0      2
71.0      2
65.0      2
2.0       2
68.0      2
64.0      2
7.0       2
78.0      1
74.0      1
79.0      1
85.0      1
70.0      1
77.0      1
83.0      1
81.0      1
76.0      1
3.0       1
1.0       1
75.0      1
Name: age_first_met, Length: 81, dtype: int64
20.0    198
18.0    194
21.0    191
19.0    171
22.0    138
23.0    131
17.0    128
24.0    109
25.0    104
16.0    103
26.0    100
28.0     90
27.0     80
29.0     76
30.0     73
35.0     62
32.0     59
31.0     59
33.0     59
40.0     58
34.0     52
36.0     49
15.0     47
39.0     44
42.0     42
37.0     39
38.0     38
45.0     37
41.0     37
43.0     36
       ... 
57.0     10
59.0      9
60.0      7
13.0      7
67.0      6
61.0      6
58.0      6
65.0      5
63.0      4
66.0      3
69.0      3
74.0      3
70.0      2
62.0      2
81.0      2
64.0      2
12.0      2
71.0      2
79.0      2
68.0      1
4.0       1
76.0      1
0.0       1
73.0      1
75.0      1
77.0      1
85.0      1
83.0      1
78.0      1
87.0      1
Name: age_relationship_begin, Length: 73, dtype: int64
22.0    151
21.0    134
23.0    129
25.0    125
24.0    125
20.0    108
26.0    106
19.0     81
18.0     74
27.0     72
28.0     68
30.0     63
29.0     55
31.0     44
34.0     43
32.0     43
17.0     39
33.0     37
36.0     34
35.0     32
37.0     26
38.0     25
39.0     21
40.0     19
41.0     17
43.0     17
44.0     16
48.0     16
42.0     15
50.0     15
47.0     14
45.0     13
46.0     12
16.0     12
56.0     11
55.0     11
49.0     11
52.0      9
54.0      8
53.0      8
51.0      7
57.0      7
60.0      5
58.0      4
59.0      4
63.0      3
66.0      2
64.0      2
15.0      2
62.0      2
61.0      2
72.0      2
67.0      1
73.0      1
69.0      1
14.0      1
71.0      1
78.0      1
70.0      1
65.0      1
Name: age_married, dtype: int64
more    1332
less    1272
same     375
Name: relative_income, dtype: int64
False    3587
True      415
Name: same_high_school, dtype: int64
False    3638
True      364
Name: same_college, dtype: int64
False    3330
True      672
Name: same_hometown, dtype: int64
1.0     536
2.0     412
0.0     343
3.0     342
4.0     282
5.0     187
6.0     173
7.0     126
9.0      86
8.0      82
10.0     81
11.0     65
13.0     50
12.0     45
14.0     30
16.0     25
15.0     22
17.0     18
18.0     10
20.0      9
19.0      8
26.0      8
23.0      8
24.0      6
27.0      6
22.0      5
21.0      4
30.0      3
32.0      3
31.0      2
41.0      2
25.0      2
28.0      2
35.0      2
38.0      1
29.0      1
69.0      1
37.0      1
70.0      1
45.0      1
Name: age_difference, dtype: int64
excellent    1771
good          911
fair          252
poor           42
very poor      20
refused        13
Name: relationship_quality, dtype: int64
False    3732
True      270
Name: met_online, dtype: int64
False    2982
True     1020
Name: met_friends, dtype: int64
False    3558
True      444
Name: met_family, dtype: int64
False    3558
True      444
Name: met_work, dtype: int64
False    2231
True     1771
Name: relationship_excellent, dtype: int64
False    2532
True     1470
Name: is_not_working, dtype: int64
True     2431
False    1571
Name: has_gay_friends, dtype: int64
False    2653
True     1349
Name: has_gay_family, dtype: int64
True     3011
False     991
Name: religion_is_christian, dtype: int64
False    3362
True      640
Name: religion_is_none, dtype: int64
False    2804
True     1198
Name: partner_religion_is_christian, dtype: int64
False    3460
True      542
Name: partner_religion_is_none, dtype: int64

In [55]:
# Countplot if categorical; distplot if numeric
from pandas.api.types import is_numeric_dtype

plt.figure(figsize=(40,40))
for i, c in enumerate(df.columns):
    plt.subplot(7,7,i+1)
    if is_numeric_dtype(df[c]):
        sns.distplot(df[c].dropna(), kde=False)
    else:
        sns.countplot(y=c, data=df)
plt.savefig('temp.png')


/Users/scott/anaconda/lib/python3.6/site-packages/seaborn/categorical.py:1428: FutureWarning: remove_na is deprecated and is a private function. Do not use.
  stat_data = remove_na(group_data)

In [57]:
sns.barplot(x='income', y='race', data=df)


/Users/scott/anaconda/lib/python3.6/site-packages/seaborn/categorical.py:1428: FutureWarning: remove_na is deprecated and is a private function. Do not use.
  stat_data = remove_na(group_data)
Out[57]:
<matplotlib.axes._subplots.AxesSubplot at 0x120e97278>