In [1]:
import pandas as pd
import sys
import os, os.path

sys.path.append('/home/will/PatientPicker/')

In [2]:
import LoadingTools

In [3]:
redcap_data = LoadingTools.load_redcap_data()

In [4]:
test_cols = [col for col in redcap_data.columns if col.startswith('Test-')]


tmp = redcap_data[['Patient ID', 'Event Name']+test_cols].groupby('Patient ID').agg('count')
mask = (tmp[test_cols] == 0).all(axis = 1)

print mask.sum()
print tmp[mask]['Event Name'].sum()
print tmp[mask]['Event Name'].max()


37
48
3

In [5]:
old_data = pd.read_csv('/home/will/HIVSystemsBio/NewCytokineAnalysis/CytoRawData.csv', sep='\t')

In [6]:
old_pats = set(old_data['Patient ID'].unique())
already_done = redcap_data['Patient ID'].map(lambda x: x in old_pats)
nredcap_data = redcap_data[~already_done]
nredcap_data['IsMale'] = nredcap_data['Gender'] == 'Male'
nredcap_data['IsFemale'] = nredcap_data['Gender'] == 'Female'
print nredcap_data


<class 'pandas.core.frame.DataFrame'>
Int64Index: 931 entries, 0 to 1418
Columns: 432 entries, Patient ID to IsFemale
dtypes: bool(174), float64(149), object(109)

In [7]:
def safe_sum(inser):
    return inser.sum()

def safe_count(inser):
    return len(inser.dropna())

def safe_first(inser):
    return inser.first()

admit_cols = [col for col in nredcap_data.columns if col.startswith('Admit-')]
test_cols = [col for col in nredcap_data.columns if col.startswith('Test-')]
haart_cols = [col for col in nredcap_data.columns if col.startswith('HAART-')]
race_cols = [col for col in nredcap_data.columns if col.startswith('Race-')]
other_cols = ['Hepatitis C status (HCV)', 'IsMale', 'IsFemale', 'Age']
agg_dict = {'Event Name':safe_count,
            'TMHDS':safe_count,
            }
for col in admit_cols+test_cols+haart_cols+race_cols+other_cols:
    agg_dict[col] = safe_sum
    
agg_dict['Age'] = 'first'

wcols = ['Event Name', 'TMHDS']+haart_cols+test_cols+admit_cols+race_cols+other_cols
rename_dict = {'Event Name':'NumVisits', 'TMHDS':'Num-TMHDS'}
pat_data = nredcap_data.groupby('Patient ID').agg(agg_dict)[wcols].rename(columns=rename_dict)
for col in pat_data.columns:
    pat_data[col] = pat_data[col].map(float)

In [8]:
num_visit = pat_data['NumVisits'] >= 3
num_tmhds = pat_data['Num-TMHDS'] >= 3
num_race = pat_data['Race-Black'] > 0
canab = pat_data['Test-Cannabinoid'] > 0
odrug_cols = [col for col in test_cols if 'Cannabinoid' not in col]
odrugs = pat_data[odrug_cols].sum(axis=1)==0
wanted_pats = pat_data[num_visit&num_tmhds&canab&odrugs&num_race]

In [54]:
mask = nredcap_data['Test-Cannabinoid'] == True
first_canab = nredcap_data[mask][['Patient ID', 'VisitNum']]
canab_samples = pd.merge(wanted_pats, first_canab,
                         left_index=True,
                         right_on='Patient ID',
                         how='inner')[['Patient ID', 'VisitNum']]
canab_samples['Experiment'] = 'Canabinoid'
canab_samples['Priority'] = 0

In [55]:
benj_x4 = [('A0017', 'R02', 'BenjX4'),
           ('A0107', 'R05', 'BenjX4'),
           ('A0208', 'R00', 'BenjX4'),
           ('A0403', 'R01', 'BenjX4')]
benj_samples = pd.DataFrame(benj_x4, columns=['Patient ID', 'VisitNum', 'Experiment']).set_index('Patient ID')
benj_samples['Priority'] = 1

In [57]:
greg_x4 = [('A0004', 'R07', 'GregX4'),
           ('A0367', 'R05', 'GregX4'),
           ('A0023', 'R01', 'GregX4')]
#skipped A0208-R02 and A0017-R01 because they are long-visit
greg_samples = pd.DataFrame(greg_x4, columns=['Patient ID', 'VisitNum', 'Experiment']).set_index('Patient ID')
greg_samples['Priority'] = 2

In [58]:
num_visit = pat_data['NumVisits'] >= 3
num_tmhds = pat_data['Num-TMHDS'] >= 3
num_race = pat_data['Race-Black'] > 0
on_haart = pat_data['HAART-On']==pat_data['NumVisits']
old_people = pat_data['Age']>=50
old_pats = pat_data[num_visit&num_tmhds&num_race&old_people&on_haart]

mask = nredcap_data['TMHDS'].notnull()
first_tmhds = nredcap_data[mask].groupby('Patient ID')[['VisitNum']].first()
old_samples = pd.merge(old_pats, first_tmhds,
                       right_index=True,
                       left_index=True,
                       how='inner')[['VisitNum', 'NumVisits']]
old_samples['Experiment'] = 'Aging'
old_samples['Priority'] = '3'
old_samples = old_samples.sort('NumVisits', ascending=False)[['VisitNum', 'Experiment', 'Priority']]
old_samples


Out[58]:
VisitNum Experiment Priority
Patient ID
A0046 R02 Aging 3
A0139 R00 Aging 3
A0004 R02 Aging 3
A0001 R02 Aging 3
A0164 R00 Aging 3
A0326 R00 Aging 3
A0259 R00 Aging 3
A0127 R01 Aging 3
A0048 R02 Aging 3
A0165 R00 Aging 3
A0111 R01 Aging 3
A0378 R00 Aging 3
A0421 R00 Aging 3
A0252 R00 Aging 3
A0070 R01 Aging 3
A0072 R01 Aging 3
A0444 R00 Aging 3
A0428 R00 Aging 3
A0446 R00 Aging 3
A0143 R00 Aging 3
A0152 R00 Aging 3
A0213 R00 Aging 3
A0225 R00 Aging 3
A0310 R00 Aging 3
A0377 R00 Aging 3
A0396 R00 Aging 3
A0172 R00 Aging 3

In [62]:
all_samples = pd.concat([canab_samples,
                         benj_samples.reset_index(),
                         greg_samples.reset_index(),
                         old_samples.reset_index(),
                         ], axis=0, ignore_index=True)
out_pats = all_samples.sort(['Priority', 'Patient ID'])
out_pats.drop('Priority', axis=1).to_excel('/home/will/RedcapQC/NewCytokineAnalysis_allvisits.xlsx', index=False)

In [61]:
out_pats


Out[61]:
Patient ID VisitNum Experiment Priority
0 A0014 R00 Canabinoid 0
1 A0037 R00 Canabinoid 0
2 A0041 R02 Canabinoid 0
3 A0052 R01 Canabinoid 0
4 A0071 R00 Canabinoid 0
5 A0071 R01 Canabinoid 0
6 A0071 R02 Canabinoid 0
7 A0110 R02 Canabinoid 0
8 A0128 R00 Canabinoid 0
9 A0128 R01 Canabinoid 0
10 A0278 R00 Canabinoid 0
11 A0281 R00 Canabinoid 0
12 A0365 R00 Canabinoid 0
13 A0017 R02 BenjX4 1
14 A0107 R05 BenjX4 1
15 A0208 R00 BenjX4 1
16 A0403 R01 BenjX4 1
17 A0004 R07 GregX4 2
19 A0023 R01 GregX4 2
18 A0367 R05 GregX4 2
23 A0001 R02 Aging 3
22 A0004 R02 Aging 3
20 A0046 R02 Aging 3
28 A0048 R02 Aging 3
34 A0070 R01 Aging 3
35 A0072 R01 Aging 3
30 A0111 R01 Aging 3
27 A0127 R01 Aging 3
21 A0139 R00 Aging 3
39 A0143 R00 Aging 3
40 A0152 R00 Aging 3
24 A0164 R00 Aging 3
29 A0165 R00 Aging 3
46 A0172 R00 Aging 3
41 A0213 R00 Aging 3
42 A0225 R00 Aging 3
33 A0252 R00 Aging 3
26 A0259 R00 Aging 3
43 A0310 R00 Aging 3
25 A0326 R00 Aging 3
44 A0377 R00 Aging 3
31 A0378 R00 Aging 3
45 A0396 R00 Aging 3
32 A0421 R00 Aging 3
37 A0428 R00 Aging 3
36 A0444 R00 Aging 3
38 A0446 R00 Aging 3

In [ ]: