Healthcare spending by individuals

This notebook takes a MEPS dataset and computes ...

Work in progress...

Links

This notebook was adapted by Dave Backus from one created by Luke K. Min for use in the NYU Stern course Data Bootcamp. We thank Martin Hackmann for the suggestion and the reference to this report.


In [1]:
import pandas as pd
import numpy as np

In [2]:
file = '../csv/MEPS_H155.csv'
url  = ''
meps = pd.read_csv(file)


C:\Users\dbackus\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:2902: DtypeWarning: Columns (59,62,65,68,504,505,1792,1793,1820,1826) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [4]:
meps.shape


Out[4]:
(5175, 1883)

In [3]:
list(meps)


Out[3]:
['DUID',
 'PID',
 'DUPERSID',
 'PANEL',
 'FAMID31',
 'FAMID42',
 'FAMID53',
 'FAMID12',
 'FAMIDYR',
 'CPSFAMID',
 'FCSZ1231',
 'FCRP1231',
 'RULETR31',
 'RULETR42',
 'RULETR53',
 'RULETR12',
 'RUSIZE31',
 'RUSIZE42',
 'RUSIZE53',
 'RUSIZE12',
 'RUCLAS31',
 'RUCLAS42',
 'RUCLAS53',
 'RUCLAS12',
 'FAMSZE31',
 'FAMSZE42',
 'FAMSZE53',
 'FAMSZE12',
 'FMRS1231',
 'FAMS1231',
 'FAMSZEYR',
 'FAMRFPYR',
 'REGION31',
 'REGION42',
 'REGION53',
 'REGION12',
 'MSA31',
 'MSA42',
 'MSA53',
 'MSA12',
 'REFPRS31',
 'REFPRS42',
 'REFPRS53',
 'REFPRS12',
 'RESP31',
 'RESP42',
 'RESP53',
 'RESP12',
 'PROXY31',
 'PROXY42',
 'PROXY53',
 'PROXY12',
 'INTVLANG',
 'BEGRFD31',
 'BEGRFM31',
 'BEGRFY31',
 'ENDRFD31',
 'ENDRFM31',
 'ENDRFY31',
 'BEGRFD42',
 'BEGRFM42',
 'BEGRFY42',
 'ENDRFD42',
 'ENDRFM42',
 'ENDRFY42',
 'BEGRFD53',
 'BEGRFM53',
 'BEGRFY53',
 'ENDRFD53',
 'ENDRFM53',
 'ENDRFY53',
 'ENDRFD12',
 'ENDRFM12',
 'ENDRFY12',
 'KEYNESS',
 'INSCOP31',
 'INSCOP42',
 'INSCOP53',
 'INSCOP12',
 'INSC1231',
 'INSCOPE',
 'ELGRND31',
 'ELGRND42',
 'ELGRND53',
 'ELGRND12',
 'PSTATS31',
 'PSTATS42',
 'PSTATS53',
 'RURSLT31',
 'RURSLT42',
 'RURSLT53',
 'AGE31X',
 'AGE42X',
 'AGE53X',
 'AGE12X',
 'AGELAST',
 'DOBMM',
 'DOBYY',
 'SEX',
 'RACEVER',
 'RACEV1X',
 'RACEAX',
 'RACEBX',
 'RACEWX',
 'RACETHX',
 'HISPANX',
 'HISPCAT',
 'MARRY31X',
 'MARRY42X',
 'MARRY53X',
 'MARRY12X',
 'SPOUID31',
 'SPOUID42',
 'SPOUID53',
 'SPOUID12',
 'SPOUIN31',
 'SPOUIN42',
 'SPOUIN53',
 'SPOUIN12',
 'EDUCYR',
 'EDUYRDEG',
 'HIDEG',
 'EDRECODE',
 'FTSTU31X',
 'FTSTU42X',
 'FTSTU53X',
 'FTSTU12X',
 'ACTDTY31',
 'ACTDTY42',
 'ACTDTY53',
 'HONRDC31',
 'HONRDC42',
 'HONRDC53',
 'RFREL31X',
 'RFREL42X',
 'RFREL53X',
 'RFREL12X',
 'MOPID31X',
 'MOPID42X',
 'MOPID53X',
 'DAPID31X',
 'DAPID42X',
 'DAPID53X',
 'RTHLTH31',
 'RTHLTH42',
 'RTHLTH53',
 'MNHLTH31',
 'MNHLTH42',
 'MNHLTH53',
 'HIBPDX',
 'HIBPAGED',
 'BPMLDX',
 'CHDDX',
 'CHDAGED',
 'ANGIDX',
 'ANGIAGED',
 'MIDX',
 'MIAGED',
 'OHRTDX',
 'OHRTAGED',
 'STRKDX',
 'STRKAGED',
 'EMPHDX',
 'EMPHAGED',
 'CHBRON31',
 'CHBRON53',
 'CHOLDX',
 'CHOLAGED',
 'CANCERDX',
 'CABLADDR',
 'BLDRAGED',
 'BLDRREMS',
 'CABRAIN',
 'BRAIAGED',
 'BRAIREMS',
 'CABREAST',
 'BRSTAGED',
 'BRSTREMS',
 'CACERVIX',
 'CERVAGED',
 'CERVREMS',
 'CACOLON',
 'COLOAGED',
 'COLOREMS',
 'CALEUKEM',
 'LEUKAGED',
 'LEUKREMS',
 'CALUNG',
 'LUNGAGED',
 'LUNGREMS',
 'CALYMPH',
 'LYMPAGED',
 'LYMPREMS',
 'CAMELANO',
 'MELAAGED',
 'MELAREMS',
 'CAOTHER',
 'OTHRAGED',
 'OTHRREMS',
 'CAPROSTA',
 'PRSTAGED',
 'PRSTREMS',
 'CASKINNM',
 'SKNMAGED',
 'SKNMREMS',
 'CASKINDK',
 'SKDKAGED',
 'SKDKREMS',
 'CATHROAT',
 'THRTAGED',
 'THRTREMS',
 'CATHYROD',
 'THYRAGED',
 'THYRREMS',
 'DIABDX',
 'DIABAGED',
 'JTPAIN31',
 'JTPAIN53',
 'ARTHDX',
 'ARTHTYPE',
 'ARTHAGED',
 'ASTHDX',
 'ASTHAGED',
 'ASSTIL31',
 'ASSTIL53',
 'ASATAK31',
 'ASATAK53',
 'ASTHEP31',
 'ASTHEP53',
 'ASACUT53',
 'ASMRCN53',
 'ASPREV53',
 'ASDALY53',
 'ASPKFL53',
 'ASEVFL53',
 'ASWNFL53',
 'ADHDADDX',
 'ADHDAGED',
 'PREGNT31',
 'PREGNT42',
 'PREGNT53',
 'IADLHP31',
 'IADLHP42',
 'IADLHP53',
 'IADL3M31',
 'IADL3M42',
 'IADL3M53',
 'ADLHLP31',
 'ADLHLP42',
 'ADLHLP53',
 'ADL3MO31',
 'ADL3MO42',
 'ADL3MO53',
 'AIDHLP31',
 'AIDHLP53',
 'WLKLIM31',
 'WLKLIM53',
 'LFTDIF31',
 'LFTDIF53',
 'STPDIF31',
 'STPDIF53',
 'WLKDIF31',
 'WLKDIF53',
 'MILDIF31',
 'MILDIF53',
 'STNDIF31',
 'STNDIF53',
 'BENDIF31',
 'BENDIF53',
 'RCHDIF31',
 'RCHDIF53',
 'FNGRDF31',
 'FNGRDF53',
 'WLK3MO31',
 'WLK3MO53',
 'ACTLIM31',
 'ACTLIM53',
 'WRKLIM31',
 'WRKLIM53',
 'HSELIM31',
 'HSELIM53',
 'SCHLIM31',
 'SCHLIM53',
 'UNABLE31',
 'UNABLE53',
 'SOCLIM31',
 'SOCLIM53',
 'COGLIM31',
 'COGLIM53',
 'WRGLAS42',
 'SEEDIF42',
 'BLIND42',
 'READNW42',
 'RECPEP42',
 'VISION42',
 'HEARAD42',
 'HEARDI42',
 'DEAF42',
 'HEARMO42',
 'HEARSM42',
 'HEARNG42',
 'ANYLIM12',
 'LSHLTH42',
 'NEVILL42',
 'SICEAS42',
 'HLTHLF42',
 'WRHLTH42',
 'CHPMED42',
 'CHPMHB42',
 'CHPMCN42',
 'CHSERV42',
 'CHSRHB42',
 'CHSRCN42',
 'CHLIMI42',
 'CHLIHB42',
 'CHLICO42',
 'CHTHER42',
 'CHTHHB42',
 'CHTHCO42',
 'CHCOUN42',
 'CHEMPB42',
 'CSHCN42',
 'MOMPRO42',
 'DADPRO42',
 'UNHAP42',
 'SCHLBH42',
 'HAVFUN42',
 'ADUPRO42',
 'NERVAF42',
 'SIBPRO42',
 'KIDPRO42',
 'SPRPRO42',
 'SCHPRO42',
 'HOMEBH42',
 'TRBLE42',
 'CHILCR42',
 'CHILWW42',
 'CHRTCR42',
 'CHRTWW42',
 'CHAPPT42',
 'CHNDCR42',
 'CHENEC42',
 'CHLIST42',
 'CHEXPL42',
 'CHRESP42',
 'CHPRTM42',
 'CHHECR42',
 'CHSPEC42',
 'CHEYRE42',
 'MESHGT42',
 'WHNHGT42',
 'MESWGT42',
 'WHNWGT42',
 'CHBMIX42',
 'MESVIS42',
 'MESBPR42',
 'WHNBPR42',
 'DENTAL42',
 'WHNDEN42',
 'EATHLT42',
 'WHNEAT42',
 'PHYSCL42',
 'WHNPHY42',
 'SAFEST42',
 'WHNSAF42',
 'BOOST42',
 'WHNBST42',
 'LAPBLT42',
 'WHNLAP42',
 'HELMET42',
 'WHNHEL42',
 'NOSMOK42',
 'WHNSMK42',
 'TIMALN42',
 'DENTCK53',
 'BPCHEK53',
 'BPMONT53',
 'CHOLCK53',
 'CHECK53',
 'NOFAT53',
 'EXRCIS53',
 'FLUSHT53',
 'ASPRIN53',
 'NOASPR53',
 'STOMCH53',
 'LSTETH53',
 'PSA53',
 'HYSTER53',
 'PAPSMR53',
 'BRSTEX53',
 'MAMOGR53',
 'BSTST53',
 'BSTSRE53',
 'CLNTST53',
 'CLNTRE53',
 'SGMTST53',
 'SGMTRE53',
 'PHYEXE53',
 'BMINDX53',
 'SEATBE53',
 'SAQELIG',
 'ADPRX42',
 'ADILCR42',
 'ADILWW42',
 'ADRTCR42',
 'ADRTWW42',
 'ADAPPT42',
 'ADNDCR42',
 'ADEGMC42',
 'ADLIST42',
 'ADEXPL42',
 'ADRESP42',
 'ADPRTM42',
 'ADINST42',
 'ADEZUN42',
 'ADTLHW42',
 'ADFFRM42',
 'ADFHLP42',
 'ADHECR42',
 'ADSMOK42',
 'ADNSMK42',
 'ADDRBP42',
 'ADSPEC42',
 'ADSPRF42',
 'ADGENH42',
 'ADDAYA42',
 'ADCLIM42',
 'ADPALS42',
 'ADPWLM42',
 'ADMALS42',
 'ADMWLM42',
 'ADPAIN42',
 'ADCAPE42',
 'ADNRGY42',
 'ADDOWN42',
 'ADSOCA42',
 'PCS42',
 'MCS42',
 'SFFLAG42',
 'ADNERV42',
 'ADHOPE42',
 'ADREST42',
 'ADSAD42',
 'ADEFRT42',
 'ADWRTH42',
 'K6SUM42',
 'ADINTR42',
 'ADDPRS42',
 'PHQ242',
 'ADINSA42',
 'ADINSB42',
 'ADRISK42',
 'ADOVER42',
 'ADCMPM42',
 'ADCMPD42',
 'ADCMPY42',
 'ADLANG42',
 'DSDIA53',
 'DSA1C53',
 'DSFT1353',
 'DSFT1253',
 'DSFT1153',
 'DSFB1153',
 'DSFTNV53',
 'DSEY1353',
 'DSEY1253',
 'DSEY1153',
 'DSEB1153',
 'DSEYNV53',
 'DSCH1353',
 'DSCH1253',
 'DSCH1153',
 'DSCB1153',
 'DSCHNV53',
 'DSFL1353',
 'DSFL1253',
 'DSFL1153',
 'DSVB1153',
 'DSFLNV53',
 'DSKIDN53',
 'DSEYPR53',
 'DSDIET53',
 'DSMED53',
 'DSINSU53',
 'DSCPCP53',
 'DSCNPC53',
 'DSCPHN53',
 'DSCINT53',
 'DSCGRP53',
 'DSCONF53',
 'DSPRX53',
 'DDNWRK31',
 'DDNWRK42',
 'DDNWRK53',
 'WKINBD31',
 'WKINBD42',
 'WKINBD53',
 'DDNSCL31',
 'DDNSCL42',
 'DDNSCL53',
 'SCLNBD31',
 'SCLNBD42',
 'SCLNBD53',
 'DDBDYS31',
 'DDBDYS42',
 'DDBDYS53',
 'OTHDYS31',
 'OTHDYS42',
 'OTHDYS53',
 'OTHNDD31',
 'OTHNDD42',
 'OTHNDD53',
 'ACCELI42',
 'LANGHM42',
 'ENGCMF42',
 'ENGSPK42',
 'USBORN42',
 'USLIVE42',
 'HAVEUS42',
 'YNOUSC42',
 'NOREAS42',
 'SELDSI42',
 'NEWARE42',
 'DKWHRU42',
 'USCNOT42',
 'PERSLA42',
 'DIFFPL42',
 'INSRPL42',
 'MYSELF42',
 'CARECO42',
 'NOHINS42',
 'OTHINS42',
 'JOBRSN42',
 'NEWDOC42',
 'DOCELS42',
 'NOLIKE42',
 'HEALTH42',
 'KNOWDR42',
 'ONJOB42',
 'NOGODR42',
 'TRANS42',
 'CLINIC42',
 'OTHREA42',
 'PROVTY42',
 'PLCTYP42',
 'GOTOUS42',
 'TMTKUS42',
 'DFTOUS42',
 'TYPEPE42',
 'LOCATN42',
 'HSPLAP42',
 'WHITPR42',
 'BLCKPR42',
 'ASIANP42',
 'NATAMP42',
 'PACISP42',
 'OTHRCP42',
 'GENDRP42',
 'MINORP42',
 'PREVEN42',
 'REFFRL42',
 'ONGONG42',
 'PHNREG42',
 'OFFHOU42',
 'AFTHOU42',
 'TREATM42',
 'RESPCT42',
 'DECIDE42',
 'EXPLOP42',
 'LANGPR42',
 'MDUNAB42',
 'MDUNRS42',
 'MDUNPR42',
 'MDDLAY42',
 'MDDLRS42',
 'MDDLPR42',
 'DNUNAB42',
 'DNUNRS42',
 'DNUNPR42',
 'DNDLAY42',
 'DNDLRS42',
 'DNDLPR42',
 'PMUNAB42',
 'PMUNRS42',
 'PMUNPR42',
 'PMDLAY42',
 'PMDLRS42',
 'PMDLPR42',
 'EMPST31',
 'EMPST42',
 'EMPST53',
 'RNDFLG31',
 'MORJOB31',
 'MORJOB42',
 'MORJOB53',
 'EVRWRK',
 'HRWG31X',
 'HRWG42X',
 'HRWG53X',
 'HRWGIM31',
 'HRWGIM42',
 'HRWGIM53',
 'HRHOW31',
 'HRHOW42',
 'HRHOW53',
 'DIFFWG31',
 'DIFFWG42',
 'DIFFWG53',
 'NHRWG31',
 'NHRWG42',
 'NHRWG53',
 'HOUR31',
 'HOUR42',
 'HOUR53',
 'TEMPJB31',
 'TEMPJB42',
 'TEMPJB53',
 'SSNLJB31',
 'SSNLJB42',
 'SSNLJB53',
 'SELFCM31',
 'SELFCM42',
 'SELFCM53',
 'DISVW31X',
 'DISVW42X',
 'DISVW53X',
 'CHOIC31',
 'CHOIC42',
 'CHOIC53',
 'INDCAT31',
 'INDCAT42',
 'INDCAT53',
 'NUMEMP31',
 'NUMEMP42',
 'NUMEMP53',
 'MORE31',
 'MORE42',
 'MORE53',
 'UNION31',
 'UNION42',
 'UNION53',
 'NWK31',
 'NWK42',
 'NWK53',
 'CHGJ3142',
 'CHGJ4253',
 'YCHJ3142',
 'YCHJ4253',
 'STJBMM31',
 'STJBDD31',
 'STJBYY31',
 'STJBMM42',
 'STJBDD42',
 'STJBYY42',
 'STJBMM53',
 'STJBDD53',
 'STJBYY53',
 'EVRETIRE',
 'OCCCAT31',
 'OCCCAT42',
 'OCCCAT53',
 'PAYVAC31',
 'PAYVAC42',
 'PAYVAC53',
 'SICPAY31',
 'SICPAY42',
 'SICPAY53',
 'PAYDR31',
 'PAYDR42',
 'PAYDR53',
 'RETPLN31',
 'RETPLN42',
 'RETPLN53',
 'BSNTY31',
 'BSNTY42',
 'BSNTY53',
 'JOBORG31',
 'JOBORG42',
 'JOBORG53',
 'HELD31X',
 'HELD42X',
 'HELD53X',
 'OFFER31X',
 'OFFER42X',
 'OFFER53X',
 'OFREMP31',
 'OFREMP42',
 'OFREMP53',
 'YNOINS31',
 'YNOINS42',
 'YNOINS53',
 'SSIDIS12',
 'AFDC12',
 'FILEDR12',
 'WILFIL12',
 'FLSTAT12',
 'FILER12',
 'JTINRU12',
 'JNTPID12',
 'CLMDEP12',
 'DEPDNT12',
 'DPINRU12',
 'DPOTSD12',
 'TAXFRM12',
 'DEDUCT12',
 'TOTDED12',
 'CLMHIP12',
 'EICRDT12',
 'FOODST12',
 'FOODMN12',
 'FOODVL12',
 'TTLP12X',
 'FAMINC12',
 'POVCAT12',
 'POVLEV12',
 'WAGEP12X',
 'WAGIMP12',
 'BUSNP12X',
 'BUSIMP12',
 'UNEMP12X',
 'UNEIMP12',
 'WCMPP12X',
 'WCPIMP12',
 'INTRP12X',
 'INTIMP12',
 'DIVDP12X',
 'DIVIMP12',
 'SALEP12X',
 'SALIMP12',
 'PENSP12X',
 'PENIMP12',
 'SSECP12X',
 'SSCIMP12',
 'TRSTP12X',
 'TRTIMP12',
 'VETSP12X',
 'VETIMP12',
 'IRASP12X',
 'IRAIMP12',
 'REFDP12X',
 'REFIMP12',
 'ALIMP12X',
 'ALIIMP12',
 'CHLDP12X',
 'CHLIMP12',
 'CASHP12X',
 'CSHIMP12',
 'SSIP12X',
 'SSIIMP12',
 'PUBP12X',
 'PUBIMP12',
 'OTHRP12X',
 'OTHIMP12',
 'HIEUIDX',
 'TRIJA12X',
 'TRIFE12X',
 'TRIMA12X',
 'TRIAP12X',
 'TRIMY12X',
 'TRIJU12X',
 'TRIJL12X',
 'TRIAU12X',
 'TRISE12X',
 'TRIOC12X',
 'TRINO12X',
 'TRIDE12X',
 'MCRJA12',
 'MCRFE12',
 'MCRMA12',
 'MCRAP12',
 'MCRMY12',
 'MCRJU12',
 'MCRJL12',
 'MCRAU12',
 'MCRSE12',
 'MCROC12',
 'MCRNO12',
 'MCRDE12',
 'MCRJA12X',
 'MCRFE12X',
 'MCRMA12X',
 'MCRAP12X',
 'MCRMY12X',
 'MCRJU12X',
 'MCRJL12X',
 'MCRAU12X',
 'MCRSE12X',
 'MCROC12X',
 'MCRNO12X',
 'MCRDE12X',
 'MCDJA12',
 'MCDFE12',
 'MCDMA12',
 'MCDAP12',
 'MCDMY12',
 'MCDJU12',
 'MCDJL12',
 'MCDAU12',
 'MCDSE12',
 'MCDOC12',
 'MCDNO12',
 'MCDDE12',
 'MCDJA12X',
 'MCDFE12X',
 'MCDMA12X',
 'MCDAP12X',
 'MCDMY12X',
 'MCDJU12X',
 'MCDJL12X',
 'MCDAU12X',
 'MCDSE12X',
 'MCDOC12X',
 'MCDNO12X',
 'MCDDE12X',
 'OPAJA12',
 'OPAFE12',
 'OPAMA12',
 'OPAAP12',
 'OPAMY12',
 'OPAJU12',
 'OPAJL12',
 'OPAAU12',
 'OPASE12',
 'OPAOC12',
 'OPANO12',
 'OPADE12',
 'OPBJA12',
 'OPBFE12',
 'OPBMA12',
 'OPBAP12',
 'OPBMY12',
 'OPBJU12',
 'OPBJL12',
 'OPBAU12',
 'OPBSE12',
 'OPBOC12',
 'OPBNO12',
 'OPBDE12',
 'STAJA12',
 'STAFE12',
 'STAMA12',
 'STAAP12',
 'STAMY12',
 'STAJU12',
 'STAJL12',
 'STAAU12',
 'STASE12',
 'STAOC12',
 'STANO12',
 'STADE12',
 'PUBJA12X',
 'PUBFE12X',
 'PUBMA12X',
 'PUBAP12X',
 'PUBMY12X',
 'PUBJU12X',
 'PUBJL12X',
 'PUBAU12X',
 'PUBSE12X',
 'PUBOC12X',
 'PUBNO12X',
 'PUBDE12X',
 'PEGJA12',
 'PEGFE12',
 'PEGMA12',
 'PEGAP12',
 'PEGMY12',
 'PEGJU12',
 'PEGJL12',
 'PEGAU12',
 'PEGSE12',
 'PEGOC12',
 'PEGNO12',
 'PEGDE12',
 'PDKJA12',
 'PDKFE12',
 'PDKMA12',
 'PDKAP12',
 'PDKMY12',
 'PDKJU12',
 'PDKJL12',
 'PDKAU12',
 'PDKSE12',
 'PDKOC12',
 'PDKNO12',
 'PDKDE12',
 'PNGJA12',
 'PNGFE12',
 'PNGMA12',
 'PNGAP12',
 'PNGMY12',
 'PNGJU12',
 'PNGJL12',
 'PNGAU12',
 'PNGSE12',
 'PNGOC12',
 'PNGNO12',
 'PNGDE12',
 'POGJA12',
 'POGFE12',
 'POGMA12',
 'POGAP12',
 'POGMY12',
 'POGJU12',
 'POGJL12',
 'POGAU12',
 'POGSE12',
 'POGOC12',
 'POGNO12',
 'POGDE12',
 'PRSJA12',
 'PRSFE12',
 'PRSMA12',
 'PRSAP12',
 'PRSMY12',
 'PRSJU12',
 'PRSJL12',
 'PRSAU12',
 'PRSSE12',
 'PRSOC12',
 'PRSNO12',
 'PRSDE12',
 'POUJA12',
 'POUFE12',
 'POUMA12',
 'POUAP12',
 'POUMY12',
 'POUJU12',
 'POUJL12',
 'POUAU12',
 'POUSE12',
 'POUOC12',
 'POUNO12',
 'POUDE12',
 'PRIJA12',
 'PRIFE12',
 'PRIMA12',
 'PRIAP12',
 'PRIMY12',
 'PRIJU12',
 'PRIJL12',
 'PRIAU12',
 'PRISE12',
 'PRIOC12',
 'PRINO12',
 'PRIDE12',
 'HPEJA12',
 'HPEFE12',
 'HPEMA12',
 'HPEAP12',
 'HPEMY12',
 'HPEJU12',
 'HPEJL12',
 'HPEAU12',
 'HPESE12',
 'HPEOC12',
 'HPENO12',
 'HPEDE12',
 'HPDJA12',
 'HPDFE12',
 'HPDMA12',
 'HPDAP12',
 'HPDMY12',
 'HPDJU12',
 'HPDJL12',
 'HPDAU12',
 'HPDSE12',
 'HPDOC12',
 'HPDNO12',
 'HPDDE12',
 'HPNJA12',
 'HPNFE12',
 'HPNMA12',
 'HPNAP12',
 'HPNMY12',
 'HPNJU12',
 'HPNJL12',
 'HPNAU12',
 'HPNSE12',
 'HPNOC12',
 'HPNNO12',
 'HPNDE12',
 'HPOJA12',
 'HPOFE12',
 'HPOMA12',
 'HPOAP12',
 'HPOMY12',
 'HPOJU12',
 'HPOJL12',
 'HPOAU12',
 'HPOSE12',
 'HPOOC12',
 'HPONO12',
 'HPODE12',
 'HPSJA12',
 'HPSFE12',
 'HPSMA12',
 'HPSAP12',
 'HPSMY12',
 'HPSJU12',
 ...]

In [2]:
#Take a look at how the data is formatted.
MEPS = pd.DataFrame.from_csv('DATA/H155.CSV', index_col=None)
MEPS.head()


/Applications/anaconda/lib/python3.4/site-packages/pandas/io/parsers.py:1170: DtypeWarning: Columns (59,62,65,68,504,505,1792,1793,1820,1826) have mixed types. Specify dtype option on import or set low_memory=False.
  data = self._reader.read(nrows)
Out[2]:
DUID PID DUPERSID PANEL FAMID31 FAMID42 FAMID53 FAMID12 FAMIDYR CPSFAMID ... RXOSR12 RXPTR12 RXOTH12 PERWT12F FAMWT12F FAMWT12C SAQWT12F DIABW12F VARSTR VARPSU
0 20004 101 20004101 17 PANEL 17 A A A A A A ... 0 0 0 5278.8731 5507.4827 5507.4827 6821.5242 0 1109 1
1 20004 102 20004102 17 PANEL 17 A A A A A A ... 0 0 0 5487.2298 5507.4827 5507.4827 6842.4318 0 1109 1
2 20004 103 20004103 17 PANEL 17 A A A A A A ... 0 0 0 4393.8235 5507.4827 5507.4827 0.0000 0 1109 1
3 20005 101 20005101 17 PANEL 17 A A A A A A ... 0 0 53 4218.8032 3566.9810 3566.9810 4164.1973 0 1109 2
4 20005 102 20005102 17 PANEL 17 A A A A A A ... 0 0 0 5666.9517 3566.9810 3566.9810 5537.4505 0 1109 2

5 rows × 1883 columns


In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline

In [3]:
#What columns are there? Make sure to reference the crosswalk.
list(MEPS.columns.values)


Out[3]:
['DUID',
 'PID',
 'DUPERSID',
 'PANEL',
 'FAMID31',
 'FAMID42',
 'FAMID53',
 'FAMID12',
 'FAMIDYR',
 'CPSFAMID',
 'FCSZ1231',
 'FCRP1231',
 'RULETR31',
 'RULETR42',
 'RULETR53',
 'RULETR12',
 'RUSIZE31',
 'RUSIZE42',
 'RUSIZE53',
 'RUSIZE12',
 'RUCLAS31',
 'RUCLAS42',
 'RUCLAS53',
 'RUCLAS12',
 'FAMSZE31',
 'FAMSZE42',
 'FAMSZE53',
 'FAMSZE12',
 'FMRS1231',
 'FAMS1231',
 'FAMSZEYR',
 'FAMRFPYR',
 'REGION31',
 'REGION42',
 'REGION53',
 'REGION12',
 'MSA31',
 'MSA42',
 'MSA53',
 'MSA12',
 'REFPRS31',
 'REFPRS42',
 'REFPRS53',
 'REFPRS12',
 'RESP31',
 'RESP42',
 'RESP53',
 'RESP12',
 'PROXY31',
 'PROXY42',
 'PROXY53',
 'PROXY12',
 'INTVLANG',
 'BEGRFD31',
 'BEGRFM31',
 'BEGRFY31',
 'ENDRFD31',
 'ENDRFM31',
 'ENDRFY31',
 'BEGRFD42',
 'BEGRFM42',
 'BEGRFY42',
 'ENDRFD42',
 'ENDRFM42',
 'ENDRFY42',
 'BEGRFD53',
 'BEGRFM53',
 'BEGRFY53',
 'ENDRFD53',
 'ENDRFM53',
 'ENDRFY53',
 'ENDRFD12',
 'ENDRFM12',
 'ENDRFY12',
 'KEYNESS',
 'INSCOP31',
 'INSCOP42',
 'INSCOP53',
 'INSCOP12',
 'INSC1231',
 'INSCOPE',
 'ELGRND31',
 'ELGRND42',
 'ELGRND53',
 'ELGRND12',
 'PSTATS31',
 'PSTATS42',
 'PSTATS53',
 'RURSLT31',
 'RURSLT42',
 'RURSLT53',
 'AGE31X',
 'AGE42X',
 'AGE53X',
 'AGE12X',
 'AGELAST',
 'DOBMM',
 'DOBYY',
 'SEX',
 'RACEVER',
 'RACEV1X',
 'RACEAX',
 'RACEBX',
 'RACEWX',
 'RACETHX',
 'HISPANX',
 'HISPCAT',
 'MARRY31X',
 'MARRY42X',
 'MARRY53X',
 'MARRY12X',
 'SPOUID31',
 'SPOUID42',
 'SPOUID53',
 'SPOUID12',
 'SPOUIN31',
 'SPOUIN42',
 'SPOUIN53',
 'SPOUIN12',
 'EDUCYR',
 'EDUYRDEG',
 'HIDEG',
 'EDRECODE',
 'FTSTU31X',
 'FTSTU42X',
 'FTSTU53X',
 'FTSTU12X',
 'ACTDTY31',
 'ACTDTY42',
 'ACTDTY53',
 'HONRDC31',
 'HONRDC42',
 'HONRDC53',
 'RFREL31X',
 'RFREL42X',
 'RFREL53X',
 'RFREL12X',
 'MOPID31X',
 'MOPID42X',
 'MOPID53X',
 'DAPID31X',
 'DAPID42X',
 'DAPID53X',
 'RTHLTH31',
 'RTHLTH42',
 'RTHLTH53',
 'MNHLTH31',
 'MNHLTH42',
 'MNHLTH53',
 'HIBPDX',
 'HIBPAGED',
 'BPMLDX',
 'CHDDX',
 'CHDAGED',
 'ANGIDX',
 'ANGIAGED',
 'MIDX',
 'MIAGED',
 'OHRTDX',
 'OHRTAGED',
 'STRKDX',
 'STRKAGED',
 'EMPHDX',
 'EMPHAGED',
 'CHBRON31',
 'CHBRON53',
 'CHOLDX',
 'CHOLAGED',
 'CANCERDX',
 'CABLADDR',
 'BLDRAGED',
 'BLDRREMS',
 'CABRAIN',
 'BRAIAGED',
 'BRAIREMS',
 'CABREAST',
 'BRSTAGED',
 'BRSTREMS',
 'CACERVIX',
 'CERVAGED',
 'CERVREMS',
 'CACOLON',
 'COLOAGED',
 'COLOREMS',
 'CALEUKEM',
 'LEUKAGED',
 'LEUKREMS',
 'CALUNG',
 'LUNGAGED',
 'LUNGREMS',
 'CALYMPH',
 'LYMPAGED',
 'LYMPREMS',
 'CAMELANO',
 'MELAAGED',
 'MELAREMS',
 'CAOTHER',
 'OTHRAGED',
 'OTHRREMS',
 'CAPROSTA',
 'PRSTAGED',
 'PRSTREMS',
 'CASKINNM',
 'SKNMAGED',
 'SKNMREMS',
 'CASKINDK',
 'SKDKAGED',
 'SKDKREMS',
 'CATHROAT',
 'THRTAGED',
 'THRTREMS',
 'CATHYROD',
 'THYRAGED',
 'THYRREMS',
 'DIABDX',
 'DIABAGED',
 'JTPAIN31',
 'JTPAIN53',
 'ARTHDX',
 'ARTHTYPE',
 'ARTHAGED',
 'ASTHDX',
 'ASTHAGED',
 'ASSTIL31',
 'ASSTIL53',
 'ASATAK31',
 'ASATAK53',
 'ASTHEP31',
 'ASTHEP53',
 'ASACUT53',
 'ASMRCN53',
 'ASPREV53',
 'ASDALY53',
 'ASPKFL53',
 'ASEVFL53',
 'ASWNFL53',
 'ADHDADDX',
 'ADHDAGED',
 'PREGNT31',
 'PREGNT42',
 'PREGNT53',
 'IADLHP31',
 'IADLHP42',
 'IADLHP53',
 'IADL3M31',
 'IADL3M42',
 'IADL3M53',
 'ADLHLP31',
 'ADLHLP42',
 'ADLHLP53',
 'ADL3MO31',
 'ADL3MO42',
 'ADL3MO53',
 'AIDHLP31',
 'AIDHLP53',
 'WLKLIM31',
 'WLKLIM53',
 'LFTDIF31',
 'LFTDIF53',
 'STPDIF31',
 'STPDIF53',
 'WLKDIF31',
 'WLKDIF53',
 'MILDIF31',
 'MILDIF53',
 'STNDIF31',
 'STNDIF53',
 'BENDIF31',
 'BENDIF53',
 'RCHDIF31',
 'RCHDIF53',
 'FNGRDF31',
 'FNGRDF53',
 'WLK3MO31',
 'WLK3MO53',
 'ACTLIM31',
 'ACTLIM53',
 'WRKLIM31',
 'WRKLIM53',
 'HSELIM31',
 'HSELIM53',
 'SCHLIM31',
 'SCHLIM53',
 'UNABLE31',
 'UNABLE53',
 'SOCLIM31',
 'SOCLIM53',
 'COGLIM31',
 'COGLIM53',
 'WRGLAS42',
 'SEEDIF42',
 'BLIND42',
 'READNW42',
 'RECPEP42',
 'VISION42',
 'HEARAD42',
 'HEARDI42',
 'DEAF42',
 'HEARMO42',
 'HEARSM42',
 'HEARNG42',
 'ANYLIM12',
 'LSHLTH42',
 'NEVILL42',
 'SICEAS42',
 'HLTHLF42',
 'WRHLTH42',
 'CHPMED42',
 'CHPMHB42',
 'CHPMCN42',
 'CHSERV42',
 'CHSRHB42',
 'CHSRCN42',
 'CHLIMI42',
 'CHLIHB42',
 'CHLICO42',
 'CHTHER42',
 'CHTHHB42',
 'CHTHCO42',
 'CHCOUN42',
 'CHEMPB42',
 'CSHCN42',
 'MOMPRO42',
 'DADPRO42',
 'UNHAP42',
 'SCHLBH42',
 'HAVFUN42',
 'ADUPRO42',
 'NERVAF42',
 'SIBPRO42',
 'KIDPRO42',
 'SPRPRO42',
 'SCHPRO42',
 'HOMEBH42',
 'TRBLE42',
 'CHILCR42',
 'CHILWW42',
 'CHRTCR42',
 'CHRTWW42',
 'CHAPPT42',
 'CHNDCR42',
 'CHENEC42',
 'CHLIST42',
 'CHEXPL42',
 'CHRESP42',
 'CHPRTM42',
 'CHHECR42',
 'CHSPEC42',
 'CHEYRE42',
 'MESHGT42',
 'WHNHGT42',
 'MESWGT42',
 'WHNWGT42',
 'CHBMIX42',
 'MESVIS42',
 'MESBPR42',
 'WHNBPR42',
 'DENTAL42',
 'WHNDEN42',
 'EATHLT42',
 'WHNEAT42',
 'PHYSCL42',
 'WHNPHY42',
 'SAFEST42',
 'WHNSAF42',
 'BOOST42',
 'WHNBST42',
 'LAPBLT42',
 'WHNLAP42',
 'HELMET42',
 'WHNHEL42',
 'NOSMOK42',
 'WHNSMK42',
 'TIMALN42',
 'DENTCK53',
 'BPCHEK53',
 'BPMONT53',
 'CHOLCK53',
 'CHECK53',
 'NOFAT53',
 'EXRCIS53',
 'FLUSHT53',
 'ASPRIN53',
 'NOASPR53',
 'STOMCH53',
 'LSTETH53',
 'PSA53',
 'HYSTER53',
 'PAPSMR53',
 'BRSTEX53',
 'MAMOGR53',
 'BSTST53',
 'BSTSRE53',
 'CLNTST53',
 'CLNTRE53',
 'SGMTST53',
 'SGMTRE53',
 'PHYEXE53',
 'BMINDX53',
 'SEATBE53',
 'SAQELIG',
 'ADPRX42',
 'ADILCR42',
 'ADILWW42',
 'ADRTCR42',
 'ADRTWW42',
 'ADAPPT42',
 'ADNDCR42',
 'ADEGMC42',
 'ADLIST42',
 'ADEXPL42',
 'ADRESP42',
 'ADPRTM42',
 'ADINST42',
 'ADEZUN42',
 'ADTLHW42',
 'ADFFRM42',
 'ADFHLP42',
 'ADHECR42',
 'ADSMOK42',
 'ADNSMK42',
 'ADDRBP42',
 'ADSPEC42',
 'ADSPRF42',
 'ADGENH42',
 'ADDAYA42',
 'ADCLIM42',
 'ADPALS42',
 'ADPWLM42',
 'ADMALS42',
 'ADMWLM42',
 'ADPAIN42',
 'ADCAPE42',
 'ADNRGY42',
 'ADDOWN42',
 'ADSOCA42',
 'PCS42',
 'MCS42',
 'SFFLAG42',
 'ADNERV42',
 'ADHOPE42',
 'ADREST42',
 'ADSAD42',
 'ADEFRT42',
 'ADWRTH42',
 'K6SUM42',
 'ADINTR42',
 'ADDPRS42',
 'PHQ242',
 'ADINSA42',
 'ADINSB42',
 'ADRISK42',
 'ADOVER42',
 'ADCMPM42',
 'ADCMPD42',
 'ADCMPY42',
 'ADLANG42',
 'DSDIA53',
 'DSA1C53',
 'DSFT1353',
 'DSFT1253',
 'DSFT1153',
 'DSFB1153',
 'DSFTNV53',
 'DSEY1353',
 'DSEY1253',
 'DSEY1153',
 'DSEB1153',
 'DSEYNV53',
 'DSCH1353',
 'DSCH1253',
 'DSCH1153',
 'DSCB1153',
 'DSCHNV53',
 'DSFL1353',
 'DSFL1253',
 'DSFL1153',
 'DSVB1153',
 'DSFLNV53',
 'DSKIDN53',
 'DSEYPR53',
 'DSDIET53',
 'DSMED53',
 'DSINSU53',
 'DSCPCP53',
 'DSCNPC53',
 'DSCPHN53',
 'DSCINT53',
 'DSCGRP53',
 'DSCONF53',
 'DSPRX53',
 'DDNWRK31',
 'DDNWRK42',
 'DDNWRK53',
 'WKINBD31',
 'WKINBD42',
 'WKINBD53',
 'DDNSCL31',
 'DDNSCL42',
 'DDNSCL53',
 'SCLNBD31',
 'SCLNBD42',
 'SCLNBD53',
 'DDBDYS31',
 'DDBDYS42',
 'DDBDYS53',
 'OTHDYS31',
 'OTHDYS42',
 'OTHDYS53',
 'OTHNDD31',
 'OTHNDD42',
 'OTHNDD53',
 'ACCELI42',
 'LANGHM42',
 'ENGCMF42',
 'ENGSPK42',
 'USBORN42',
 'USLIVE42',
 'HAVEUS42',
 'YNOUSC42',
 'NOREAS42',
 'SELDSI42',
 'NEWARE42',
 'DKWHRU42',
 'USCNOT42',
 'PERSLA42',
 'DIFFPL42',
 'INSRPL42',
 'MYSELF42',
 'CARECO42',
 'NOHINS42',
 'OTHINS42',
 'JOBRSN42',
 'NEWDOC42',
 'DOCELS42',
 'NOLIKE42',
 'HEALTH42',
 'KNOWDR42',
 'ONJOB42',
 'NOGODR42',
 'TRANS42',
 'CLINIC42',
 'OTHREA42',
 'PROVTY42',
 'PLCTYP42',
 'GOTOUS42',
 'TMTKUS42',
 'DFTOUS42',
 'TYPEPE42',
 'LOCATN42',
 'HSPLAP42',
 'WHITPR42',
 'BLCKPR42',
 'ASIANP42',
 'NATAMP42',
 'PACISP42',
 'OTHRCP42',
 'GENDRP42',
 'MINORP42',
 'PREVEN42',
 'REFFRL42',
 'ONGONG42',
 'PHNREG42',
 'OFFHOU42',
 'AFTHOU42',
 'TREATM42',
 'RESPCT42',
 'DECIDE42',
 'EXPLOP42',
 'LANGPR42',
 'MDUNAB42',
 'MDUNRS42',
 'MDUNPR42',
 'MDDLAY42',
 'MDDLRS42',
 'MDDLPR42',
 'DNUNAB42',
 'DNUNRS42',
 'DNUNPR42',
 'DNDLAY42',
 'DNDLRS42',
 'DNDLPR42',
 'PMUNAB42',
 'PMUNRS42',
 'PMUNPR42',
 'PMDLAY42',
 'PMDLRS42',
 'PMDLPR42',
 'EMPST31',
 'EMPST42',
 'EMPST53',
 'RNDFLG31',
 'MORJOB31',
 'MORJOB42',
 'MORJOB53',
 'EVRWRK',
 'HRWG31X',
 'HRWG42X',
 'HRWG53X',
 'HRWGIM31',
 'HRWGIM42',
 'HRWGIM53',
 'HRHOW31',
 'HRHOW42',
 'HRHOW53',
 'DIFFWG31',
 'DIFFWG42',
 'DIFFWG53',
 'NHRWG31',
 'NHRWG42',
 'NHRWG53',
 'HOUR31',
 'HOUR42',
 'HOUR53',
 'TEMPJB31',
 'TEMPJB42',
 'TEMPJB53',
 'SSNLJB31',
 'SSNLJB42',
 'SSNLJB53',
 'SELFCM31',
 'SELFCM42',
 'SELFCM53',
 'DISVW31X',
 'DISVW42X',
 'DISVW53X',
 'CHOIC31',
 'CHOIC42',
 'CHOIC53',
 'INDCAT31',
 'INDCAT42',
 'INDCAT53',
 'NUMEMP31',
 'NUMEMP42',
 'NUMEMP53',
 'MORE31',
 'MORE42',
 'MORE53',
 'UNION31',
 'UNION42',
 'UNION53',
 'NWK31',
 'NWK42',
 'NWK53',
 'CHGJ3142',
 'CHGJ4253',
 'YCHJ3142',
 'YCHJ4253',
 'STJBMM31',
 'STJBDD31',
 'STJBYY31',
 'STJBMM42',
 'STJBDD42',
 'STJBYY42',
 'STJBMM53',
 'STJBDD53',
 'STJBYY53',
 'EVRETIRE',
 'OCCCAT31',
 'OCCCAT42',
 'OCCCAT53',
 'PAYVAC31',
 'PAYVAC42',
 'PAYVAC53',
 'SICPAY31',
 'SICPAY42',
 'SICPAY53',
 'PAYDR31',
 'PAYDR42',
 'PAYDR53',
 'RETPLN31',
 'RETPLN42',
 'RETPLN53',
 'BSNTY31',
 'BSNTY42',
 'BSNTY53',
 'JOBORG31',
 'JOBORG42',
 'JOBORG53',
 'HELD31X',
 'HELD42X',
 'HELD53X',
 'OFFER31X',
 'OFFER42X',
 'OFFER53X',
 'OFREMP31',
 'OFREMP42',
 'OFREMP53',
 'YNOINS31',
 'YNOINS42',
 'YNOINS53',
 'SSIDIS12',
 'AFDC12',
 'FILEDR12',
 'WILFIL12',
 'FLSTAT12',
 'FILER12',
 'JTINRU12',
 'JNTPID12',
 'CLMDEP12',
 'DEPDNT12',
 'DPINRU12',
 'DPOTSD12',
 'TAXFRM12',
 'DEDUCT12',
 'TOTDED12',
 'CLMHIP12',
 'EICRDT12',
 'FOODST12',
 'FOODMN12',
 'FOODVL12',
 'TTLP12X',
 'FAMINC12',
 'POVCAT12',
 'POVLEV12',
 'WAGEP12X',
 'WAGIMP12',
 'BUSNP12X',
 'BUSIMP12',
 'UNEMP12X',
 'UNEIMP12',
 'WCMPP12X',
 'WCPIMP12',
 'INTRP12X',
 'INTIMP12',
 'DIVDP12X',
 'DIVIMP12',
 'SALEP12X',
 'SALIMP12',
 'PENSP12X',
 'PENIMP12',
 'SSECP12X',
 'SSCIMP12',
 'TRSTP12X',
 'TRTIMP12',
 'VETSP12X',
 'VETIMP12',
 'IRASP12X',
 'IRAIMP12',
 'REFDP12X',
 'REFIMP12',
 'ALIMP12X',
 'ALIIMP12',
 'CHLDP12X',
 'CHLIMP12',
 'CASHP12X',
 'CSHIMP12',
 'SSIP12X',
 'SSIIMP12',
 'PUBP12X',
 'PUBIMP12',
 'OTHRP12X',
 'OTHIMP12',
 'HIEUIDX',
 'TRIJA12X',
 'TRIFE12X',
 'TRIMA12X',
 'TRIAP12X',
 'TRIMY12X',
 'TRIJU12X',
 'TRIJL12X',
 'TRIAU12X',
 'TRISE12X',
 'TRIOC12X',
 'TRINO12X',
 'TRIDE12X',
 'MCRJA12',
 'MCRFE12',
 'MCRMA12',
 'MCRAP12',
 'MCRMY12',
 'MCRJU12',
 'MCRJL12',
 'MCRAU12',
 'MCRSE12',
 'MCROC12',
 'MCRNO12',
 'MCRDE12',
 'MCRJA12X',
 'MCRFE12X',
 'MCRMA12X',
 'MCRAP12X',
 'MCRMY12X',
 'MCRJU12X',
 'MCRJL12X',
 'MCRAU12X',
 'MCRSE12X',
 'MCROC12X',
 'MCRNO12X',
 'MCRDE12X',
 'MCDJA12',
 'MCDFE12',
 'MCDMA12',
 'MCDAP12',
 'MCDMY12',
 'MCDJU12',
 'MCDJL12',
 'MCDAU12',
 'MCDSE12',
 'MCDOC12',
 'MCDNO12',
 'MCDDE12',
 'MCDJA12X',
 'MCDFE12X',
 'MCDMA12X',
 'MCDAP12X',
 'MCDMY12X',
 'MCDJU12X',
 'MCDJL12X',
 'MCDAU12X',
 'MCDSE12X',
 'MCDOC12X',
 'MCDNO12X',
 'MCDDE12X',
 'OPAJA12',
 'OPAFE12',
 'OPAMA12',
 'OPAAP12',
 'OPAMY12',
 'OPAJU12',
 'OPAJL12',
 'OPAAU12',
 'OPASE12',
 'OPAOC12',
 'OPANO12',
 'OPADE12',
 'OPBJA12',
 'OPBFE12',
 'OPBMA12',
 'OPBAP12',
 'OPBMY12',
 'OPBJU12',
 'OPBJL12',
 'OPBAU12',
 'OPBSE12',
 'OPBOC12',
 'OPBNO12',
 'OPBDE12',
 'STAJA12',
 'STAFE12',
 'STAMA12',
 'STAAP12',
 'STAMY12',
 'STAJU12',
 'STAJL12',
 'STAAU12',
 'STASE12',
 'STAOC12',
 'STANO12',
 'STADE12',
 'PUBJA12X',
 'PUBFE12X',
 'PUBMA12X',
 'PUBAP12X',
 'PUBMY12X',
 'PUBJU12X',
 'PUBJL12X',
 'PUBAU12X',
 'PUBSE12X',
 'PUBOC12X',
 'PUBNO12X',
 'PUBDE12X',
 'PEGJA12',
 'PEGFE12',
 'PEGMA12',
 'PEGAP12',
 'PEGMY12',
 'PEGJU12',
 'PEGJL12',
 'PEGAU12',
 'PEGSE12',
 'PEGOC12',
 'PEGNO12',
 'PEGDE12',
 'PDKJA12',
 'PDKFE12',
 'PDKMA12',
 'PDKAP12',
 'PDKMY12',
 'PDKJU12',
 'PDKJL12',
 'PDKAU12',
 'PDKSE12',
 'PDKOC12',
 'PDKNO12',
 'PDKDE12',
 'PNGJA12',
 'PNGFE12',
 'PNGMA12',
 'PNGAP12',
 'PNGMY12',
 'PNGJU12',
 'PNGJL12',
 'PNGAU12',
 'PNGSE12',
 'PNGOC12',
 'PNGNO12',
 'PNGDE12',
 'POGJA12',
 'POGFE12',
 'POGMA12',
 'POGAP12',
 'POGMY12',
 'POGJU12',
 'POGJL12',
 'POGAU12',
 'POGSE12',
 'POGOC12',
 'POGNO12',
 'POGDE12',
 'PRSJA12',
 'PRSFE12',
 'PRSMA12',
 'PRSAP12',
 'PRSMY12',
 'PRSJU12',
 'PRSJL12',
 'PRSAU12',
 'PRSSE12',
 'PRSOC12',
 'PRSNO12',
 'PRSDE12',
 'POUJA12',
 'POUFE12',
 'POUMA12',
 'POUAP12',
 'POUMY12',
 'POUJU12',
 'POUJL12',
 'POUAU12',
 'POUSE12',
 'POUOC12',
 'POUNO12',
 'POUDE12',
 'PRIJA12',
 'PRIFE12',
 'PRIMA12',
 'PRIAP12',
 'PRIMY12',
 'PRIJU12',
 'PRIJL12',
 'PRIAU12',
 'PRISE12',
 'PRIOC12',
 'PRINO12',
 'PRIDE12',
 'HPEJA12',
 'HPEFE12',
 'HPEMA12',
 'HPEAP12',
 'HPEMY12',
 'HPEJU12',
 'HPEJL12',
 'HPEAU12',
 'HPESE12',
 'HPEOC12',
 'HPENO12',
 'HPEDE12',
 'HPDJA12',
 'HPDFE12',
 'HPDMA12',
 'HPDAP12',
 'HPDMY12',
 'HPDJU12',
 'HPDJL12',
 'HPDAU12',
 'HPDSE12',
 'HPDOC12',
 'HPDNO12',
 'HPDDE12',
 'HPNJA12',
 'HPNFE12',
 'HPNMA12',
 'HPNAP12',
 'HPNMY12',
 'HPNJU12',
 'HPNJL12',
 'HPNAU12',
 'HPNSE12',
 'HPNOC12',
 'HPNNO12',
 'HPNDE12',
 'HPOJA12',
 'HPOFE12',
 'HPOMA12',
 'HPOAP12',
 'HPOMY12',
 'HPOJU12',
 'HPOJL12',
 'HPOAU12',
 'HPOSE12',
 'HPOOC12',
 'HPONO12',
 'HPODE12',
 'HPSJA12',
 'HPSFE12',
 'HPSMA12',
 'HPSAP12',
 'HPSMY12',
 'HPSJU12',
 ...]

In [4]:
#This checks how many (rows, columns).
np.shape(MEPS)


Out[4]:
(5175, 1883)

In [21]:
#Take a brief look at which age groups we are looking at. A histogram.
import seaborn as sns
sns.set(style="darkgrid")

year_of_birth = MEPS.DOBYY
age = 2015 - year_of_birth
sns.distplot(age, kde=False)
plt.xlabel('age')
plt.ylabel('observations')
plt.title('Age Groups')


Out[21]:
<matplotlib.text.Text at 0x10c963b38>

In [16]:
age.describe()


Out[16]:
count    5175.000000
mean       36.382802
std        21.823397
min         3.000000
25%        18.000000
50%        34.000000
75%        53.000000
max        88.000000
Name: DOBYY, dtype: float64

In [17]:
#Now let's look at the medical expenditures column.
t = MEPS.TOTEXP12

In [18]:
total_exp = MEPS.TOTEXP12.order()
t = total_exp.reset_index().TOTEXP12

In [158]:
#We would like to see a cumulative distribution of health care spending.
cdf = np.cumsum(t) / 15221120
index1 = cdf.reset_index().index / 5175
cdf = cdf.reset_index()
cdf['x'] = index1
cdf.plot(x='x', y='TOTEXP12')
#y = cdf.index / 5175

#cdf.plot(x='y')


Out[158]:
<matplotlib.axes._subplots.AxesSubplot at 0x11873ce48>

In [113]:
#Now we would like to weave the two together - age and medical expenses.
age_exp = MEPS.groupby(['DOBYY', 'TOTEXP12']).size()
plt.scatter(x=(2012 - MEPS.DOBYY), y=MEPS.TOTEXP12)
plt.xlabel('Age')
plt.ylabel('2012 Total Medical Expenditure')
plt.title('Age vs. Medical Expenditure')
plt.axis('tight')

#Run a regression? Ask Prof Backus about this.


Out[113]:
(-4.2566120289657263,
 89.256612028965748,
 -13658.959918043447,
 286837.9599180434)

In [30]:
age = 2012 - MEPS.DOBYY
exp = MEPS.TOTEXP12

sns.jointplot(x=age, y=exp)


Out[30]:
<seaborn.axisgrid.JointGrid at 0x10e0bbb00>

In [34]:
#Try running a regression?
sns.regplot(x=age, y=exp)


Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x10e7df668>

In [36]:
#How about gender? Quick look at gender distribution. There are more female observations.
MEPS.SEX.value_counts().plot(kind='bar')


Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x1122e9898>

In [63]:
#Gender and Expenditures
sns.stripplot(x=MEPS.SEX, y=MEPS.TOTEXP12)


Out[63]:
<matplotlib.axes._subplots.AxesSubplot at 0x1125eeeb8>

In [126]:
def bottom_percent(list, percent):
    total = 0
    for x in list:
        if x < np.percentile(list, percent):
            total = total + x
    return total

def top_percent(list, percent):
    total = 0
    for x in list:
        if x > np.percentile(list, percent):
            total = total + x
    return total

In [194]:
#Mean spending of portions of sample
def bottom_mean_spending (list, percent):
    return np.mean(list[list <= np.percentile(list, percent)])

def top_mean_spending (list, percent):
    return np.mean(list[list >= np.percentile(list, percent)])

Means = (bottom_mean_spending (exp, 50),
         top_mean_spending (exp, 50),
         top_mean_spending (exp, 70),
         top_mean_spending (exp, 90),
         top_mean_spending (exp, 95),
         top_mean_spending (exp, 99),
         top_mean_spending (exp, 99.9))

bins = ("Bottom 50%", "Top 50%", "Top 30%", 
        "Top 10%", "Top 5%", "Top 1%", "Top 0.1%")

sns.barplot(bins, Means)
plt.title('Mean Medical Expenditure 2012')
plt.xlabel('Percent of Population, Ordered')
plt.ylabel('Mean Annual Expenditure per Person')


Out[194]:
<matplotlib.text.Text at 0x1195d6a20>

In [ ]:


In [ ]:


In [46]:
np.percentile(exp, 50)


Out[46]:
382.0

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [35]:
##THIS IS JUST A SAMPLE CODE FROM THE WEB.
import numpy as np
import matplotlib.pyplot as plt


N = 5
menMeans   = (20, 35, 30, 35, 27)
womenMeans = (25, 32, 34, 20, 25)
menStd     = (2, 3, 4, 1, 2)
womenStd   = (3, 5, 2, 3, 3)
ind = np.arange(N)    # the x locations for the groups
width = 0.35       # the width of the bars: can also be len(x) sequence

p1 = plt.bar(ind, menMeans,   width, color='r', yerr=womenStd)
p2 = plt.bar(ind, womenMeans, width, color='y',
             bottom=menMeans, yerr=menStd)

plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.xticks(ind+width/2., ('G1', 'G2', 'G3', 'G4', 'G5') )
plt.yticks(np.arange(0,81,10))
plt.legend( (p1[0], p2[0]), ('Men', 'Women') )

plt.show()



In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: