In [1]:
import csv
with open('hmp_humann/humann-mappings_unique.tsv', 'rU') as humann_map_f:
r = csv.reader(humann_map_f, delimiter='\t')
humann_map = [row for row in r]
with open('hmp_mappings/unified_map.tsv', 'rU') as map_16s_f:
r = csv.reader(map_16s_f, delimiter='\t')
map_16s = [row for row in r]
In [2]:
print humann_map[0]
In [3]:
print map_16s[0]
In [4]:
map_16s_matched = []
map_16s_matched.append(map_16s[0])
In [5]:
RANDSID = humann_map[0].index('RANDSID')
STSite = humann_map[0].index('STSite')
Parent_Specimen = humann_map[0].index('Parent_Specimen')
SRS = humann_map[0].index('SRS')
In [6]:
RSID = map_16s[0].index('RSID')
HMPBodySubsiteHMPBodySite = map_16s[0].index('HMPBodySubsiteHMPBodySite')
PSN = map_16s[0].index('PSN')
NAP = map_16s[0].index('NAP')
In [7]:
for row in humann_map[1:]:
this_RANDSID = row[RANDSID]
this_STSite = row[STSite]
this_set = []
for map_row in map_16s:
if map_row[RSID] == this_RANDSID and map_row[HMPBodySubsiteHMPBodySite] == this_STSite:
map_16s_matched.append(map_row)
In [8]:
len(map_16s_matched)
Out[8]:
In [9]:
# there are no doubt some duplicates in there
# will deal with that in terminal
with open('hmp_mappings/humann_full_map_raw.tsv','wb') as f:
w = csv.writer(f,delimiter='\t')
for row in map_16s_matched:
w.writerow(row)
In [10]:
# oops: forgot the other options
map_16s_matched = [map_16s[0]]
In [11]:
for row in humann_map[1:]:
this_RANDSID = row[RANDSID]
this_STSite = row[STSite]
this_Parent_Specimen = row[Parent_Specimen]
this_SRS = row[SRS]
this_set = []
for map_row in map_16s:
if map_row[RSID] == this_RANDSID and map_row[HMPBodySubsiteHMPBodySite] == this_STSite:
map_16s_matched.append(map_row)
elif row[PSN] == this_Parent_Specimen:
map_16s_matched.append(map_row)
elif row[NAP] == this_SRS:
map_16s_matched.append(map_row)
In [12]:
len(map_16s_matched)
Out[12]:
In [13]:
len(map_16s)
Out[13]:
In [14]:
# Looks like there are likely duplicates
# Will deal with them through bash
with open('hmp_mappings/humann_full_map_raw.tsv','wb') as f:
w = csv.writer(f,delimiter='\t')
for row in map_16s_matched:
w.writerow(row)
In [15]:
# double oops forgot this
for i in range(len(map_16s_matched)):
row = map_16s_matched[i]
this_end = row[0].find('.V')
if this_end != -1:
this_start = row[0][0:this_end]
else:
this_start = row[0]
row = [this_start] + row
map_16s_matched[i] = row
In [16]:
with open('hmp_mappings/humann_full_map_raw.tsv','wb') as f:
w = csv.writer(f,delimiter='\t')
for row in map_16s_matched:
w.writerow(row)
In [ ]: