add additional covariates to the mapping file after 08/21/2017 Lab Meeting (output: mapping_more_MrOS.txt)


In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

mapping file


In [2]:
mf = pd.read_csv('../data_uparse/mapping_cleaned_MrOS.txt', sep='\t', dtype=str, index_col='#SampleID')

In [3]:
# add health variable
health = pd.read_csv('../data/MrOS_healthvari.csv', sep=',', dtype=str)
health = health.rename(columns={'ID': '#SampleID'}).set_index('#SampleID')
health['QLCOMP'] = health['QLCOMP'].astype('category')

In [4]:
health.head()


Out[4]:
QLCOMP
#SampleID
BI0023 1:GOOD/EXCELLENT
BI0056 1:GOOD/EXCELLENT
BI0131 1:GOOD/EXCELLENT
BI0153 1:GOOD/EXCELLENT
BI0215 1:GOOD/EXCELLENT

In [5]:
print(mf.shape)
mf.head()


(599, 66)
Out[5]:
BarcodeSequence LinkerPrimerSequence Experiment_Design_Description Library_Construction_Protocol Linker Platform Center_Name Center_Project Instrument_Model Title ... OHV1D2 OHV1D2CT OHVD2CT OHVDTOT OHV1DTOT OHSEAS VDstatus Description ratio_activation ratio_catabolism
#SampleID
BI0023 TCTGGTGACATT GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D... 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq MrOS_VitaminD ... 0.0 1: Yes 1: Yes 25.8 0.039299999999999995 3:SUMMER sufficiency Orwoll.BI0023.BI 0.0015232558139534882 0.0686046511627907
BI0056 CAAGCATGCCTA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D... 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq MrOS_VitaminD ... 0.0 1: Yes 1: Yes 39.2 0.0619 2:SPRING sufficiency Orwoll.BI0056.BI 0.001579081632653061 0.09974489795918368
BI0131 CTATTTGCGACA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D... 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq MrOS_VitaminD ... 0.0 1: Yes 1: Yes 23.1 0.0521 2:SPRING sufficiency Orwoll.BI0131.BI 0.002255411255411255 0.06450216450216449
BI0153 ATCGGCGTTACA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D... 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq MrOS_VitaminD ... 0.0 1: Yes 1: Yes 27.3 0.0431 2:SPRING sufficiency Orwoll.BI0153.BI 0.0015787545787545787 0.07838827838827839
BI0215 CCTCTCGTGATC GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D... 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq MrOS_VitaminD ... 0.0 1: Yes 1: Yes 33.0 0.0502 4:FALL sufficiency Orwoll.BI0215.BI 0.0015212121212121212 0.1096969696969697

5 rows × 66 columns


In [6]:
df = pd.merge(mf, health, left_index=True, right_index=True)

In [7]:
print(df.shape)
df.head()


(599, 67)
Out[7]:
BarcodeSequence LinkerPrimerSequence Experiment_Design_Description Library_Construction_Protocol Linker Platform Center_Name Center_Project Instrument_Model Title ... OHV1D2CT OHVD2CT OHVDTOT OHV1DTOT OHSEAS VDstatus Description ratio_activation ratio_catabolism QLCOMP
#SampleID
BI0023 TCTGGTGACATT GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D... 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq MrOS_VitaminD ... 1: Yes 1: Yes 25.8 0.039299999999999995 3:SUMMER sufficiency Orwoll.BI0023.BI 0.0015232558139534882 0.0686046511627907 1:GOOD/EXCELLENT
BI0056 CAAGCATGCCTA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D... 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq MrOS_VitaminD ... 1: Yes 1: Yes 39.2 0.0619 2:SPRING sufficiency Orwoll.BI0056.BI 0.001579081632653061 0.09974489795918368 1:GOOD/EXCELLENT
BI0131 CTATTTGCGACA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D... 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq MrOS_VitaminD ... 1: Yes 1: Yes 23.1 0.0521 2:SPRING sufficiency Orwoll.BI0131.BI 0.002255411255411255 0.06450216450216449 1:GOOD/EXCELLENT
BI0153 ATCGGCGTTACA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D... 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq MrOS_VitaminD ... 1: Yes 1: Yes 27.3 0.0431 2:SPRING sufficiency Orwoll.BI0153.BI 0.0015787545787545787 0.07838827838827839 1:GOOD/EXCELLENT
BI0215 CCTCTCGTGATC GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D... 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq MrOS_VitaminD ... 1: Yes 1: Yes 33.0 0.0502 4:FALL sufficiency Orwoll.BI0215.BI 0.0015212121212121212 0.1096969696969697 1:GOOD/EXCELLENT

5 rows × 67 columns


In [8]:
df.to_csv('../data_uparse/mapping_more_MrOS.txt', sep= '\t', index=True)

In [ ]: