In [10]:
import os

import pandas as pd
import seaborn as sns


/home/jmatsen/miniconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

Get all the dirs with the .flagstat files:


In [9]:
for root, dirs, files in os.walk('../../workspace/'):
     for file in files:
            if ".flagstat" in file:
                print file


LakWasMet40_LOW7_2.sorted.bam.flagstat
LakWasMe81_HOW10_2.sorted.bam.flagstat
LakWasMet58_HOW8_2.sorted.bam.flagstat
LakWasMet52_LOW8_2.sorted.bam.flagstat
LakWasMe92_HOW11_2.sorted.bam.flagstat
LakWasM121_LOW14_2.sorted.bam.flagstat
LakWasMet25_LOW6_2.sorted.bam.flagstat
LakWasMe85_LOW11_2.sorted.bam.flagstat
LakWasM127_HOW14_2.sorted.bam.flagstat
LakWasM130_HOW14_2.sorted.bam.flagstat
LakWasMet31_HOW6_2.sorted.bam.flagstat
LakWasMet51_LOW8_2.sorted.bam.flagstat
LakWasMe74_LOW10_2.sorted.bam.flagstat
LakWasMeta8_HOW4_2.sorted.bam.flagstat
LakWasMe93_HOW11_2.sorted.bam.flagstat
LakWasMet56_HOW8_2.sorted.bam.flagstat
LakWasMet26_LOW6_2.sorted.bam.flagstat
LakWasM110_LOW13_2.sorted.bam.flagstat
LakWasMe79_HOW10_2.sorted.bam.flagstat
LakWasMe86_LOW11_2.sorted.bam.flagstat
LakWasM104_HOW12_2.sorted.bam.flagstat
LakWasMe80_HOW10_2.sorted.bam.flagstat
LakWasMeta4_LOW4_2.sorted.bam.flagstat
LakWasMe97_LOW12_2.sorted.bam.flagstat
LakWasMeta2_LOW4_2.sorted.bam.flagstat
LakWasMe94_HOW11_2.sorted.bam.flagstat
LakWasM111_LOW13_2.sorted.bam.flagstat
LakWasMe75_LOW10_2.sorted.bam.flagstat
LakWasMet39_LOW7_2.sorted.bam.flagstat
LakWasM109_LOW13_2.sorted.bam.flagstat
LakWasMet70_HOW9_2.sorted.bam.flagstat
LakWasMet19_HOW5_2.sorted.bam.flagstat
LakWasMeta3_LOW4_2.sorted.bam.flagstat
LakWasMet57_HOW8_2.sorted.bam.flagstat
LakWasM128_HOW14_2.sorted.bam.flagstat
LakWasMeta7_HOW4_2.sorted.bam.flagstat
LakWasMet63_LOW9_2.sorted.bam.flagstat
LakWasMet15_LOW5_2.sorted.bam.flagstat
LakWasM106_HOW12_2.sorted.bam.flagstat
LakWasMet27_LOW6_2.sorted.bam.flagstat
LakWasMet49_LOW8_2.sorted.bam.flagstat
LakWasMet20_HOW5_2.sorted.bam.flagstat
LakWasMet28_LOW6_2.sorted.bam.flagstat
LakWasMet50_LOW8_2.sorted.bam.flagstat
LakWasMet37_LOW7_2.sorted.bam.flagstat
LakWasMet38_LOW7_2.sorted.bam.flagstat
LakWasM118_HOW13_2.sorted.bam.flagstat
LakWasMet16_LOW5_2.sorted.bam.flagstat
LakWasM115_HOW13_2.sorted.bam.flagstat
LakWasMet33_HOW6_2.sorted.bam.flagstat
LakWasM123_LOW14_2.sorted.bam.flagstat
LakWasM117_HOW13_2.sorted.bam.flagstat
LakWasMet21_HOW5_2.sorted.bam.flagstat
LakWasMet22_HOW5_2.sorted.bam.flagstat
LakWasMe82_HOW10_2.sorted.bam.flagstat
LakWasM124_LOW14_2.sorted.bam.flagstat
LakWasMet13_LOW5_2.sorted.bam.flagstat
LakWasMe76_LOW10_2.sorted.bam.flagstat
LakWasM105_HOW12_2.sorted.bam.flagstat
LakWasMe73_LOW10_2.sorted.bam.flagstat
LakWasM116_HOW13_2.sorted.bam.flagstat
LakWasMet43_HOW7_2.sorted.bam.flagstat
LakWasMet10_HOW4_2.sorted.bam.flagstat
LakWasMet55_HOW8_2.sorted.bam.flagstat
LakWasMet32_HOW6_2.sorted.bam.flagstat
LakWasMe98_LOW12_2.sorted.bam.flagstat
LakWasMeta1_LOW4_2.sorted.bam.flagstat
LakWasMet62_LOW9_2.sorted.bam.flagstat
LakWasMet69_HOW9_2.sorted.bam.flagstat
LakWasM129_HOW14_2.sorted.bam.flagstat
LakWasMe91_HOW11_2.sorted.bam.flagstat
LakWasM122_LOW14_2.sorted.bam.flagstat
LakWasMet14_LOW5_2.sorted.bam.flagstat
LakWasMe99_LOW12_2.sorted.bam.flagstat
LakWasMet61_LOW9_2.sorted.bam.flagstat
LakWasM100_LOW12_2.sorted.bam.flagstat
LakWasMet34_HOW6_2.sorted.bam.flagstat
LakWasMet64_LOW9_2.sorted.bam.flagstat
LakWasMet67_HOW9_2.sorted.bam.flagstat
LakWasMet44_HOW7_2.sorted.bam.flagstat
LakWasM112_LOW13_2.sorted.bam.flagstat
LakWasMeta9_HOW4_2.sorted.bam.flagstat
LakWasMet45_HOW7_2.sorted.bam.flagstat

In [15]:
!ls "../../workspace/LakWasMet57_HOW8_2/bwa/"


aln			       LakWasMet57_HOW8_2.sorted.bam.bai
aln.log			       LakWasMet57_HOW8_2.sorted.bam.flagstat
database		       LakWasMet57_HOW8_2.summary.dat
database.log		       samtools
database.sql		       samtools.log
LakWasMet57_HOW8_2.sorted.bam

In [18]:
pd.read_csv('../../workspace/LakWasMet57_HOW8_2/bwa/LakWasMet57_HOW8_2.sorted.bam.flagstat',
           sep='\t')


Out[18]:
38960894 + 0 in total (QC-passed reads + QC-failed reads)
0 453676 + 0 secondary
1 0 + 0 supplementary
2 0 + 0 duplicates
3 22041367 + 0 mapped (56.57% : N/A)
4 38507218 + 0 paired in sequencing
5 19253609 + 0 read1
6 19253609 + 0 read2
7 18552612 + 0 properly paired (48.18% : N/A)
8 20324822 + 0 with itself and mate mapped
9 1262869 + 0 singletons (3.28% : N/A)
10 1741360 + 0 with mate mapped to a different chr
11 1459081 + 0 with mate mapped to a different ch...

In [ ]:


In [7]:
os.listdir("../../workspace/LakWasMet57_HOW8_2/bwa/")


Out[7]:
['samtools.log',
 'LakWasMet57_HOW8_2.sorted.bam.bai',
 'aln',
 'LakWasMet57_HOW8_2.sorted.bam',
 'LakWasMet57_HOW8_2.sorted.bam.flagstat',
 'LakWasMet57_HOW8_2.summary.dat',
 'samtools',
 'database',
 'database.log',
 'database.sql',
 'aln.log']

In [ ]: