IMPORT REDCAP DATA AND THE COLUMNS REGARDING CURRENT AND PAST ART TREATMENT


In [1]:
from pandas import HDFStore #This is a file storage format for large collections of data
store = HDFStore('/home/will/HIVReportGen/Data/BaseRedcap/HIVAIDSGeneticAnalys_DATA_LABELS_2013-01-16_1211.hdf')
redcap_data = store['redcap']
store.close()

In [2]:
print redcap_data
print redcap_data.columns


<class 'pandas.core.frame.DataFrame'>
Int64Index: 1419 entries, 0 to 1418
Columns: 425 entries, Patient ID to Predicted-R5
dtypes: bool(171), float64(115), object(139)
array([Patient ID, Event Name, Patient visit number, Date of visit,
       Year of Birth, Age, Gender, Transgender designation, Ethnicity,
       Race (choice='Asian'),
       Race (choice='American Indian/Alaska Native'),
       Race (choice='Black or African American'),
       Race (choice='Native Hawaiian or other Pacific Islander'),
       Race (choice='White'), Race (choice='More than one race'),
       Race (choice='Unknown'), Height, Weight, Abdominal girth,
       Highest education level completed,
       Exposure Category (choice='Men who have sex with men (MSM)'),
       Exposure Category (choice='Injection drug use (IDU)'),
       Exposure Category (choice='Blood transfusion'),
       Exposure Category (choice='Heterosexual'),
       Exposure Category (choice='Hemophilia'),
       Exposure Category (choice='Perinatal'),
       Exposure Category (choice='MSM and IDU'),
       Exposure Category (choice='Heterosexual and IDU'),
       Exposure Category (choice='Other'), Exposure Category (choice='ND'),
       Explanation of 'other' selection for exposure category,
       Current Tobacco use, Date stopped tobacco use,
       Type of tobacco used (choice='Cigarettes'),
       Type of tobacco used (choice='Cigars'),
       Type of tobacco used (choice='Pipes'),
       Type of tobacco used (choice='Chewing tobacco'),
       Number of uses per day, Number of years used, Current Alcohol use,
       Date stopped alcohol use, Number of beers consumed per week,
       Number of glasses of wine consumed per week,
       Number of liquor based drinks consumed per week, Current Drug use,
       Date Stopped drug use, Age first used drug, Date last used drugs,
       Drug Use and HIV Status, Drugs used (choice='Marijuana'),
       Drugs used (choice='Cocaine (crack, nasal, smoke, inject)'),
       Drugs used (choice='Heroin (nasal, inject)'),
       Drugs used (choice='Methamphetamine (smoke, nasal, inject)'),
       Drugs used (choice='Benzodiazapine (i.e. valium, ativan, xanax, klonipin, etc)'),
       Drugs used (choice='Narcotics'), Drugs used (choice='Ecstasy'),
       Drugs used (choice='PCP'), Drugs used (choice='Ritalin'),
       Drugs used (choice='Other'), Drugs used (choice='none'),
       Designation of 'other' drug use,
       Specimen Collection (choice='urine'),
       Specimen Collection (choice='hair follicles'),
       Specimen Collection (choice='blood only'), Amphetamines,
       Barbiturates, Benzodiazepines, Cannabinoid, Cocaine + metabolite,
       Opiates, Phencyclidine, User designation, Pregnant,
       Estimated due date, HIV seropositive date, Years seropositive,
       How obtained HIV seropositive date (choice='verbal'),
       How obtained HIV seropositive date (choice='medical records'),
       Likely location of infection, DSG lowest, DSG today,
       Initial CD4 count (cells/uL), Date of initial CD4 count,
       Nadir CD4 count (cells/uL), Date of nadir CD4 count,
       Latest CD4 count (cells/uL), Date of latest CD4 count,
       Initial CD8 count (cells/uL), Date of initial CD8 count,
       Nadir CD8 count (cells/uL), Date of nadir CD8 count,
       Latest CD8 count (cells/uL), Date of latest CD8 count,
       Initial viral load (copies/mL), Date of initial viral load,
       Peak viral load (copies/mL), Date of peak viral load,
       Latest viral load, Date of latest viral load, Current ART status,
       Date first began HAART, Current ART (choice='AZT'),
       Current ART (choice='ABC'), Current ART (choice='DVL'),
       Current ART (choice='ATV'), Current ART (choice='T-20'),
       Current ART (choice='3TC'), Current ART (choice='TDF'),
       Current ART (choice='SAQ'), Current ART (choice='AMP'),
       Current ART (choice='FPV'), Current ART (choice='DDI'),
       Current ART (choice='FTC'), Current ART (choice='RTV'),
       Current ART (choice='LPV/r'), Current ART (choice='DDC'),
       Current ART (choice='EFV'), Current ART (choice='NFL'),
       Current ART (choice='TPV'), Current ART (choice='D4T'),
       Current ART (choice='NVP'), Current ART (choice='IDV'),
       Current ART (choice='DRV'), Current ART (choice='Combivir'),
       Current ART (choice='Trizivir'), Current ART (choice='Kaletra'),
       Current ART (choice='Epzicom'), Current ART (choice='Truvada'),
       Current ART (choice='Atripla'), Current ART (choice='Other'),
       Current ART (choice='none'), Current ART (choice='ND'),
       Define 'other' selection for current ART, Past ART (choice='AZT'),
       Past ART (choice='ABC'), Past ART (choice='DVL'),
       Past ART (choice='ATV'), Past ART (choice='T-20'),
       Past ART (choice='3TC'), Past ART (choice='TDF'),
       Past ART (choice='SAQ'), Past ART (choice='AMP'),
       Past ART (choice='FPV'), Past ART (choice='DDI'),
       Past ART (choice='FTC'), Past ART (choice='RTV'),
       Past ART (choice='LPV/r'), Past ART (choice='DDC'),
       Past ART (choice='EFV'), Past ART (choice='NFL'),
       Past ART (choice='TPV'), Past ART (choice='D4T'),
       Past ART (choice='NVP'), Past ART (choice='IDV'),
       Past ART (choice='DRV'), Past ART (choice='Combivir'),
       Past ART (choice='Trizivir'), Past ART (choice='Kaletra'),
       Past ART (choice='Epzicom'), Past ART (choice='Truvada'),
       Past ART (choice='Atripla'), Past ART (choice='Other'),
       Past ART (choice='none'), Past ART (choice='ND'),
       Define 'other' selection for past ART, List any drug resistance,
       Date of patients most recent CBC test, Red blood cell count date,
       Red blood cell count result, Hemoglobin date, Hemoglobin results,
       Hematocrit date, Hematocrit results, Red blood cell indices date,
       Red blood cells indices results, Mean corpuscular hemoglobin date,
       Mean corpuscular hemoglobin results,
       Mean corpuscular hemoglobin concentration date,
       Mean corpuscular hemoglobin concentration results,
       Red blood cell distribution date,
       Red blood cell distribution results, White blood cell count date,
       White blood cell count results, Differential count date,
       Differential count results, Platelet count date,
       Platelet count results, Creatinine date, Creatinine results,
       BUN date, BUN results, Bilirubin date, Bilirubin results,
       Hepatitis B status (HBV), Year diagnosed HBV positive,
       Hepatitis C status (HCV), Year diagnosed HCV positive,
       Cytomegalovirus (CMV), Year diagnosed CMV positive,
       Human Papillomavirus (HPV), Year diagnosed HPV positive,
       Herpes Simplex Virus Type 1 (HSV 1), Year diagnosed HSV 1 positive,
       Herpes Simplex Virus Type 2 (HSV 2), Year diagnosed HSV 2 positive,
       Tuberculosis, Year diagnosed tuberculosis positive, Hypertension,
       Year diagnosed with hypertension, Diabetes,
       Year diagnosed with diabetes, Elevated lipids,
       Year diagnosed with elevated lipids, Asthma,
       Year diagnosed with asthma,
       Chronic obstructive pulmonary disease (COPD),
       Year diagnosed with COPD,
       Current AIDS defining indicators (choice='Candidiasis of bronchi, trachea, or lungs'),
       Current AIDS defining indicators (choice='Candidiasis, esophageal'),
       Current AIDS defining indicators (choice='Cervical cancer, invasive'),
       Current AIDS defining indicators (choice='Coccidioidomycosis, disseminated'),
       Current AIDS defining indicators (choice='Cryptococcosis, extrapulmonary'),
       Current AIDS defining indicators (choice='Cryptosporidiosis, chronic intestinal (>1 month duration)'),
       Current AIDS defining indicators (choice='Cytomegalovirus disease- other than liver, spleen, or lymph nodes'),
       Current AIDS defining indicators (choice='Cytomegalovirus retinitis (with loss of vision)'),
       Current AIDS defining indicators (choice='Encephalopathy, HIV-related'),
       Current AIDS defining indicators (choice='Herpes simplex: chronic ulcer(s) (>1 mo. duration) or bronchitis, pneumonitis, or esophagitis'),
       Current AIDS defining indicators (choice='Histoplasmosis, disseminated'),
       Current AIDS defining indicators (choice='Isosporiasis, chronic intestinal (>1 month duration)'),
       Current AIDS defining indicators (choice='Kaposi's sarcoma'),
       Current AIDS defining indicators (choice='Lymphoma, Burkitt's'),
       Current AIDS defining indicators (choice='Lymphoma, immunoblastic'),
       Current AIDS defining indicators (choice='Lymphoma, primary, of brain (primary central nervous system lymphoma)'),
       Current AIDS defining indicators (choice='Mycobacterium avium complex or disease caused by M. Kansasii, disseminated'),
       Current AIDS defining indicators (choice='Disease caused by Mycobacterium tuberculosis, any site (pulmonary or extrapulmonary)'),
       Current AIDS defining indicators (choice='Disease caused by Mycobacterium, other species or unidentified species, disseminated'),
       Current AIDS defining indicators (choice='Pneumocystis carinii pneumonia'),
       Current AIDS defining indicators (choice='Pneumonia, recurrent'),
       Current AIDS defining indicators (choice='Progressive multifocal leukoencephalopathy'),
       Current AIDS defining indicators (choice='Salmonella septicemia, recurrent'),
       Current AIDS defining indicators (choice='Toxoplasmosis of brain (encephalitis)'),
       Current AIDS defining indicators (choice='Wasting syndrome caused by HIV infection'),
       Current AIDS defining indicators (choice='Other'),
       Current AIDS defining indicators (choice='none'),
       Date diagnosed: Candidiasis of bronchi, trachea, or lungs,
       Date diagnosed: Candidiasis, esophageal,
       Date diagnosed: Cervical Cancer, invasive,
       Date diagnosed: Coccidioidomycosis, disseminated,
       Date diagnosed: Cryptococcosis, extrapulmonary,
       Date diagnosed: Crypotosporidiosis, chronic intestinal (>1 month duration),
       Date diagnosed: Cytomegalovirus disease- other than liver, spleen, or lymph nodes,
       Date diagnosed: Cytomegalovirus retinitis (with loss of vision),
       Date diagnosed: Encephalopathy, HIV-related,
       Date diagnosed: Herpes simplex, chronic ulcer(s) (>1 mo. duration) or bronchitis, pneumonitis, or esophagitis,
       Date diagnosed: Histoplasmosis, disseminated,
       Date diagnosed: Isosporiasis, chronic intestinal (>1month duration),
       Date diagnosed: Kaposi's sarcoma,
       Date diagnosed: Lymphoma, Burkitt's,
       Date diagnosed: Lymphoma, immunoblastic,
       Date diagnosed: Lymphoma, primary, of brain (primary central nervous system lymphoma),
       Date diagnosed: Mycobacterium avium complex or disease caused by M. Kansasii, disseminated,
       Date diagnosed: Disease caused by Mycobacterium tuberculosis, any site (pulmonary or extrapulmonary),
       Date diagnosed: Disease caused by Mycobacterium, other species or unidentified species, disseminated,
       Date diagnosed: Pneumocystis carinii pneumonia,
       Date diagnosed: Pneumonia, recurrent,
       Date diagnosed: Progressive multifocal leukoencephalopathy,
       Date diagnosed: Salmonella septicemia, recurrent,
       Date diagnosed: Toxoplasmosis of brain (encephalitis),
       Date diagnosed: Wasting syndrome caused by HIV infection,
       Define 'other' designation for AIDS defining indicators.  Include date diagnosed.,
       Past AIDS defining indicators (choice='Candidiasis of bronchi, trachea, or lungs'),
       Past AIDS defining indicators (choice='Candidiasis, esophageal'),
       Past AIDS defining indicators (choice='Cervical cancer, invasive'),
       Past AIDS defining indicators (choice='Coccidioidomycosis, disseminated'),
       Past AIDS defining indicators (choice='Cryptococcosis, extrapulmonary'),
       Past AIDS defining indicators (choice='Cryptosporidiosis, chronic intestinal (>1 month duration)'),
       Past AIDS defining indicators (choice='Cytomegalovirus disease- other than liver, spleen, or lymph nodes'),
       Past AIDS defining indicators (choice='Cytomegalovirus retinitis (with loss of vision)'),
       Past AIDS defining indicators (choice='Encephalopathy, HIV-related'),
       Past AIDS defining indicators (choice='Herpes simplex: chronic ulcer(s) (>1 mo. duration) or bronchitis, pneumonitis, or esophagitis'),
       Past AIDS defining indicators (choice='Histoplasmosis, disseminated'),
       Past AIDS defining indicators (choice='Isosporiasis, chronic intestinal (>1 month duration)'),
       Past AIDS defining indicators (choice='Kaposi's sarcoma'),
       Past AIDS defining indicators (choice='Lymphoma, Burkitt's'),
       Past AIDS defining indicators (choice='Lymphoma, immunoblastic'),
       Past AIDS defining indicators (choice='Lymphoma, primary, of brain (primary central nervous system lymphoma)'),
       Past AIDS defining indicators (choice='Mycobacterium avium complex or disease caused by M. Kansasii, disseminated'),
       Past AIDS defining indicators (choice='Disease caused by Mycobacterium tuberculosis, any site (pulmonary or extrapulmonary)'),
       Past AIDS defining indicators (choice='Disease caused by Mycobacterium, other species or unidentified species, disseminated'),
       Past AIDS defining indicators (choice='Pneumocystis carinii pneumonia'),
       Past AIDS defining indicators (choice='Pneumonia, recurrent'),
       Past AIDS defining indicators (choice='Progressive multifocal leukoencephalopathy'),
       Past AIDS defining indicators (choice='Salmonella septicemia, recurrent'),
       Past AIDS defining indicators (choice='Toxoplasmosis of brain (encephalitis)'),
       Past AIDS defining indicators (choice='Wasting syndrome caused by HIV infection'),
       Past AIDS defining indicators (choice='Other'),
       Past AIDS defining indicators (choice='none'),
       Date resolved: Candidiasis of bronchi, trachea, or lungs,
       Date resolved: Candidiasis, esophageal,
       Date resolved: Cervical Cancer, invasive,
       Date resolved: Coccidioidomycosis, disseminated,
       Date resolved: Cryptococcosis, extrapulmonary,
       Date resolved: Crypotosporidiosis, chronic intestinal (>1 month duration),
       Date resolved: Cytomegalovirus disease- other than liver, spleen, or lymph nodes,
       Date resolved: Cytomegalovirus retinitis (with loss of vision),
       Date resolved: Encephalopathy, HIV-related,
       Date resolved: Herpes simplex, chronic ulcer(s) (>1 mo. duration) or bronchitis, pneumonitis, or esophagitis,
       Date resolved: Histoplasmosis, disseminated,
       Date resolved: Isosporiasis, chronic intestinal (>1month duration),
       Date resolved: Kaposi's sarcoma, Date resolved: Lymphoma, Burkitt's,
       Date resolved: Lymphoma, immunoblastic,
       Date resolved: Lymphoma, primary, of brain (primary central nervous system lymphoma),
       Date resolved: Mycobacterium avium complex or disease caused by M. Kansasii, disseminated,
       Date resolved: Disease caused by Mycobacterium tuberculosis, any site (pulmonary or extrapulmonary),
       Date resolved: Disease caused by Mycobacterium, other species or unidentified species, disseminated,
       Date resolved: Pneumocystis carinii pneumonia,
       Date resolved: Pneumonia, recurrent,
       Date resolved: Progressive multifocal leukoencephalopathy,
       Date diagnosed: Salmonella septicemia, recurrent.1,
       Date resolved: Toxoplasmosis of brain (encephalitis),
       Date resolved: Wasting syndrome caused by HIV infection,
       Define 'other' designation for AIDS defining indicators. Include date resolved.,
       Neurocognitive test, MSK Score, Psychomotor Speed Score,
       Memory Recall Score, Constructional Score,
       Total Modified Hopkins Dementia Score,
       Mental health history (including inpatient stay for MH) / Hospitalizations for medical/surgical reasons),
       Other diagnoses or complicating conditions relating to HIV-1 infection including all sexually transmitted diseases,
       Other comments or observations,
       Mental Health Issues (choice='Depression'),
       Mental Health Issues (choice='Bipolar'),
       Mental Health Issues (choice='Schizophrenia'),
       Mental Health Issues (choice='ADHD'),
       Mental Health Issues (choice='Anxiety'),
       Mental Health Issues (choice='Dementia'),
       Mental Health Issues (choice='Epilepsy/Seizures'),
       Mental Health Issues (choice='Diagnosed HIVD'),
       Mental Health Issues (choice='Head trauma or spinal injury'),
       Mental Health Issues (choice='MS'),
       Mental Health Issues (choice='Paranoia'),
       Mental Health Issues (choice='PTSD'),
       Mental Health Issues (choice='Stroke'),
       Mental Health Issues (choice='Peripheral neuropathy'),
       Mental Health Issues (choice='Other neurological problems'),
       Mental Health Issues (choice='No neurological problems'),
       Clarification of 'other neurological problems' under mental health issues,
       Complete?, PBMC isolation complete, Date of PBMC isolation,
       Number of tubes of PBMCs collected, Serum collected,
       Number of tubes of serum collected, Genomic DNA isolated,
       Concentration of genomic DNA, Successful PCR product collection,
       Date of successful PCR product collection,
       Name of person who completed successful PCR,
       Lab notebook number of PCR collection,
       Lab notebook page number of PCR collection, PCR sequence obtained,
       Company who completed PCR sequencing,
       Date of successful PCR product sequencing,
       Sequence sent to Brian Moldover, Drop box folder title,
       Date trace file sent to Brian Moldover,
       Sequence passed quality control,
       Date sequence passed quality control,
       SNPs for trace file determined, Date SNPs were called,
       4.4kB fragment for this visit, Date of 4.4kB fragment,
       Name of person who successfully completed 4.4kB fragment,
       Lab notebook number of 4.4kB fragment,
       Lab notebook page number of 4.4kB fragment,
       Sequence of 4.4kB obtained (choice='whole fragment'),
       Sequence of 4.4kB obtained (choice='LTR'),
       Sequence of 4.4kB obtained (choice='envelope'),
       Sequence of 4.4kB obtained (choice='V3 only'),
       Sequence of 4.4kB obtained (choice='other'),
       Define 'other' for sequence of 4.4kB obtained ,
       Deep sequencing for this visit, Date of Deep sequencing,
       Deep sequencing completed by which facility,
       Lab notebook number for deep sequencing,
       Lab notebook page number for deep sequencing, RNA isolated,
       Date of RNA isolation, Name of person who isolated RNA,
       Lab notebook number for RNA isolation,
       Lab notebook page number for RNA isolation,
       Microarray/RNA expression profiling completed,
       Date microarray/RNA expression profiling completed,
       Name of person/facitlity that completed microarray/RNA expression profiling,
       Lab notebook number for microarray/RNA expression profiling,
       Lab notebook page number for microarray/RNA expression profiling,
       miRNA profiling completed, Date miRNA profiling completed,
       Name of person/facility that completed miRNA profiling,
       Lab notebook number for miRNA profiling,
       Lab notebook page number for miRNA profiling,
       Cytokine profiling completed, Date cytokine profiling completed,
       Name of person/facility that completed cytokine profiling,
       Lab notebook number for cytokine profiling,
       Lab notebook page number for cytokine profiling,
       Viral RNA from plasma isolated, Date of vRNA isolation,
       Name of person who isolated vRNA ,
       Lab notebook number for vRNA isolation,
       Lab notebook page number for vRNA isolation,
       Envelope in silico prediction completed,
       Date of envelope in silico prediction completion,
       Name of person who completed envelope in silico prediction,
       Lab notebook number of envelope in silico prediction,
       Lab notebook page number of envelope in silico prediction,
       Envelope functionality/tropism completed,
       Date of envelope functionality/tropism completion,
       Name of person who completed envelope functionality/tropism,
       Lab notebook number of envelope functionality/tropism,
       Lab notebook page number of envelope functionality/tropism,
       Complete?.1, Predicted-R5], dtype=object)

In [14]:
therapy_data = redcap_data[["Current ART (choice='AZT')","Current ART (choice='ABC')","Current ART (choice='DVL')","Current ART (choice='ATV')",
"Current ART (choice='T-20')","Current ART (choice='3TC')","Current ART (choice='TDF')","Current ART (choice='SAQ')","Current ART (choice='AMP')",
"Current ART (choice='FPV')","Current ART (choice='DDI')","Current ART (choice='FTC')", "Current ART (choice='RTV')","Current ART (choice='LPV/r')", 
"Current ART (choice='DDC')","Current ART (choice='EFV')","Current ART (choice='NFL')","Current ART (choice='TPV')","Current ART (choice='D4T')",
"Current ART (choice='NVP')","Current ART (choice='IDV')","Current ART (choice='DRV')","Current ART (choice='Combivir')","Current ART (choice='Trizivir')", 
"Current ART (choice='Kaletra')","Current ART (choice='Epzicom')","Current ART (choice='Truvada')","Current ART (choice='Atripla')", "Current ART (choice='Other')",
"Current ART (choice='none')","Current ART (choice='ND')","Past ART (choice='AZT')","Past ART (choice='ABC')","Past ART (choice='DVL')","Past ART (choice='ATV')", 
"Past ART (choice='T-20')","Past ART (choice='3TC')","Past ART (choice='TDF')","Past ART (choice='SAQ')","Past ART (choice='AMP')","Past ART (choice='FPV')", 
"Past ART (choice='DDI')","Past ART (choice='FTC')","Past ART (choice='RTV')","Past ART (choice='LPV/r')","Past ART (choice='DDC')","Past ART (choice='EFV')", 
"Past ART (choice='NFL')","Past ART (choice='TPV')","Past ART (choice='D4T')","Past ART (choice='NVP')","Past ART (choice='IDV')","Past ART (choice='DRV')", 
"Past ART (choice='Combivir')","Past ART (choice='Trizivir')","Past ART (choice='Kaletra')","Past ART (choice='Epzicom')","Past ART (choice='Truvada')",
"Past ART (choice='Atripla')","Past ART (choice='Other')","Past ART (choice='none')","Past ART (choice='ND')"]]
print therapy_data


<class 'pandas.core.frame.DataFrame'>
Int64Index: 1419 entries, 0 to 1418
Data columns:
Current ART (choice='AZT')         1419  non-null values
Current ART (choice='ABC')         1419  non-null values
Current ART (choice='DVL')         1419  non-null values
Current ART (choice='ATV')         1419  non-null values
Current ART (choice='T-20')        1419  non-null values
Current ART (choice='3TC')         1419  non-null values
Current ART (choice='TDF')         1419  non-null values
Current ART (choice='SAQ')         1419  non-null values
Current ART (choice='AMP')         1419  non-null values
Current ART (choice='FPV')         1419  non-null values
Current ART (choice='DDI')         1419  non-null values
Current ART (choice='FTC')         1419  non-null values
Current ART (choice='RTV')         1419  non-null values
Current ART (choice='LPV/r')       1419  non-null values
Current ART (choice='DDC')         1419  non-null values
Current ART (choice='EFV')         1419  non-null values
Current ART (choice='NFL')         1419  non-null values
Current ART (choice='TPV')         1419  non-null values
Current ART (choice='D4T')         1419  non-null values
Current ART (choice='NVP')         1419  non-null values
Current ART (choice='IDV')         1419  non-null values
Current ART (choice='DRV')         1419  non-null values
Current ART (choice='Combivir')    1419  non-null values
Current ART (choice='Trizivir')    1419  non-null values
Current ART (choice='Kaletra')     1419  non-null values
Current ART (choice='Epzicom')     1419  non-null values
Current ART (choice='Truvada')     1419  non-null values
Current ART (choice='Atripla')     1419  non-null values
Current ART (choice='Other')       1419  non-null values
Current ART (choice='none')        1419  non-null values
Current ART (choice='ND')          1419  non-null values
Past ART (choice='AZT')            1419  non-null values
Past ART (choice='ABC')            1419  non-null values
Past ART (choice='DVL')            1419  non-null values
Past ART (choice='ATV')            1419  non-null values
Past ART (choice='T-20')           1419  non-null values
Past ART (choice='3TC')            1419  non-null values
Past ART (choice='TDF')            1419  non-null values
Past ART (choice='SAQ')            1419  non-null values
Past ART (choice='AMP')            1419  non-null values
Past ART (choice='FPV')            1419  non-null values
Past ART (choice='DDI')            1419  non-null values
Past ART (choice='FTC')            1419  non-null values
Past ART (choice='RTV')            1419  non-null values
Past ART (choice='LPV/r')          1419  non-null values
Past ART (choice='DDC')            1419  non-null values
Past ART (choice='EFV')            1419  non-null values
Past ART (choice='NFL')            1419  non-null values
Past ART (choice='TPV')            1419  non-null values
Past ART (choice='D4T')            1419  non-null values
Past ART (choice='NVP')            1419  non-null values
Past ART (choice='IDV')            1419  non-null values
Past ART (choice='DRV')            1419  non-null values
Past ART (choice='Combivir')       1419  non-null values
Past ART (choice='Trizivir')       1419  non-null values
Past ART (choice='Kaletra')        1419  non-null values
Past ART (choice='Epzicom')        1419  non-null values
Past ART (choice='Truvada')        1419  non-null values
Past ART (choice='Atripla')        1419  non-null values
Past ART (choice='Other')          1419  non-null values
Past ART (choice='none')           1419  non-null values
Past ART (choice='ND')             1419  non-null values
dtypes: bool(62)

IMPORT INFORMATION THAT CONVERTS DRUGS TO DRUG CLASSES

First import a file (saved on Will's super computer) that can be used to convert the names of the row colums, and make a new DataFrame(?) which groups the names into drug classes

In [26]:
#therapy_data["Current ART (choice='T-20')"]

#redcap column headers are too long, want to rename them to refer to the drugs more explictly

new_headers = {
              


test_mask = therapy_data["Current ART (choice='ATV')"] == True
print test_mask


  File "<ipython-input-26-95c30c16cfd0>", line 7
    "Current ART (choice='ABC')":"ABC"
                                ^
SyntaxError: invalid syntax

In [ ]: