In [1]:
import pandas as pd

In [2]:
df_16s = pd.read_csv("data/taxatable.burst.16S.txt", sep="\t", index_col=0)

In [17]:
df_hmp_map = pd.read_csv("data/HMP_map.txt", sep='\t', index_col="SRS")

In [18]:
df_16s.columns


Out[18]:
Index(['SRS011271', 'SRS011405', 'SRS011452', 'SRS011529', 'SRS011584',
       'SRS011586', 'SRS013234', 'SRS013252', 'SRS013258', 'SRS013502',
       ...
       'SRS063603', 'SRS063985', 'SRS064276', 'SRS064493', 'SRS064557',
       'SRS064809', 'SRS065179', 'SRS065278', 'SRS065310', 'SRS065347'],
      dtype='object', length=180)

In [19]:
df_hmp_map.loc[df_16s.columns, :].shape


Out[19]:
(269, 77)

In [20]:
df_shotgun_map = pd.read_csv("data/hmp_shotgun_files.txt", sep='\t', index_col="srs")

In [31]:
df_shotgun_map.loc[df_16s.columns, :].to_csv("data/hmp_shotgun_paired_samples.txt", sep="\t")

In [32]:
df_hmp_map[df_hmp_map['TARGET_SUBFRAGMENT'] == 'v35'].loc[df_16s.columns, :].to_csv("data/hmp_16S_paired_samples.txt", sep="\t")

In [ ]: