In [114]:
    
NROWS           = 1000
NROWS           = 10000
NROWS           = None
PARSE_VERBOSE   = False
#PARSE_VERBOSE   = True
REPORT_SIZE     = True
HISTOGRAM_COLOR = (0.5,0.5,0.5)
ALL_GRAPH_COLOR = (0.2,0.2,0.2)
OUTPUT_FORMAT   = 'eps'
FULL_FIG_W , FULL_FIG_H  = (19 if REPORT_SIZE else 16) , 8
CHROM_FIG_W, CHROM_FIG_H = FULL_FIG_W, 20
config = {
    'data_folder': 'probes',
    'RUN'        : 'run_BWA_JKL_23_JU_1_orig',
    'chromosome' : 'SL2.40ch12',
    'scaffold'   : 'SL2.40sc04878',
    'out_folder' : 'reports'
}
if True:
    config['BAC'            ] = 'JBSC0201'
    config['BAC_coord_start'] = 622710
    config['BAC_coord_end'  ] = 624594
    config['BAC_coord'      ] = '%012d-%012d' % ( config['BAC_coord_start'], config['BAC_coord_end'  ] )
    config['RUN'            ] = 'run_BWA_JKL_23_JU_1_orig_PROBES'
    
BAC_MODE     = False
cols_to_plot = [
        # Col name                                 limit     # ticks
        [ "K-mer Coverage"                       , [0,1]   , 5 ], 
        [ "Sequencing Coverage"                  , None    , 5 ],
        [ "Ns"                                   , [0.1,1] , 0 ],
        [ "AGP Contig"                           , [0.1,1] , 0 ],
        [ "AGP Gap"                              , [0.1,1] , 0 ],
        [ "AGP Unknown"                          , [0.1,1] , 0 ],
        [ "AGP Other"                            , [0.1,1] , 0 ],
        [ "K-mer Coverage averaged: 500 bp"      , [0,None], 2 ],
        [ "K-mer Coverage averaged: 2.5 Kbp"     , [0,None], 2 ],
        [ "K-mer Coverage averaged: 5 Kbp"       , [0,None], 2 ],
        [ "K-mer Coverage averaged: 50 Kbp"      , [0,None], 2 ],
        [ "K-mer Coverage averaged: 1 Mbp"       , [0,None], 2 ],
        [ "K-mer Coverage averaged: 5 Kbp before", [0,None], 2 ],
        [ "K-mer Coverage averaged: 5 Kbp after" , [0,None], 2 ]
    ]
if 'BAC' in config:
    BAC_MODE = True
    cols_to_plot.insert(2, ['BLAST Coverage', [0, None], 5 ])
print "Column Names"
for col_to_plot in cols_to_plot:
    print "\t", col_to_plot[0]
    
print "Config"
for cfg in sorted(config.keys()):
    print "\t%-15s: %s" % (cfg, config[cfg])
    
    
In [115]:
    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
import os
from IPython.display import HTML
import operator
#import pylab
#pylab.show()
%pylab inline
pd.set_option('display.notebook_repr_html', True)
#pd.set_option('display.max_columns', 20)
#pd.set_option('display.max_rows', 25)
def addHeader(level, text):
    display( HTML('''<h%(level)d>%(text)s</h%(level)d>''' % {'level':level, 'text':text}) )
    
    
    
In [116]:
    
"""
S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.SL2.40ch12.sam.SL2.40sc06147.sam.cov
S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.SL2.40ch12.sam.SL2.40sc06147.sam.cov.prop.cov
S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.SL2.40ch12.sam.SL2.40sc06147.sam.pos
"""
    
KmerCoverageFile       = "%(data_folder)s/%(RUN)s/S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.%(chromosome)s.sam.%(scaffold)s.sam.cov.prop.cov" % config
SequencingCoverageFile = "%(data_folder)s/mapping/out/S_lycopersicum_chromosomes.fa_S_lycopersicum_chromosomes.pos.%(chromosome)s.pos.cov.%(scaffold)s.cov"    % config
AgpContigFile          = "%(data_folder)s/agp/S_lycopersicum_scaffolds_from_contigs.2.40.agp.%(scaffold)s.agp.contig.agp.cov"  % config
AgpGapFile             = "%(data_folder)s/agp/S_lycopersicum_scaffolds_from_contigs.2.40.agp.%(scaffold)s.agp.gap.agp.cov"     % config
AgpOtherFile           = "%(data_folder)s/agp/S_lycopersicum_scaffolds_from_contigs.2.40.agp.%(scaffold)s.agp.other.agp.cov"   % config
AgpUnknownFile         = "%(data_folder)s/agp/S_lycopersicum_scaffolds_from_contigs.2.40.agp.%(scaffold)s.agp.unknown.agp.cov" % config
NsFile                 = "%(data_folder)s/Ns/S_lycopersicum_scaffolds.fa_NONE.tab.%(chromosome)s.tab.%(scaffold)s.tab.cov"     % config
infiles                = [KmerCoverageFile, SequencingCoverageFile, 
                          AgpContigFile, AgpGapFile, 
                          AgpUnknownFile, AgpOtherFile, NsFile]
if BAC_MODE:
    """
                                                      S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.SL2.40ch12.SL2.40sc05611.JBPP0904_primer.000000967164-000000970727.S_lycopersicum_chromosomes.fa_S_lycopersicum_chromosomes.pos.SL2.40ch12.pos.cov.SL2.40sc05611.cov
                                                      S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.SL2.40ch12.SL2.40sc05611.JBPP0904_primer.000000967164-000000970727.S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.SL2.40ch12.sam.SL2.40sc05611.sam.cov.prop.cov
                                                      S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.SL2.40ch12.SL2.40sc05611.JBPP0904_primer.000000967164-000000970727_Product.fasta.blast.cov
                                                      S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.SL2.40ch12.SL2.40sc05611.JBPP0904_primer.000000967164-000000970727.S_lycopersicum_scaffolds_from_contigs.2.40.agp.SL2.40sc05611.agp.contig.agp.cov
                                                      S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.SL2.40ch12.SL2.40sc05611.JBPP0904_primer.000000967164-000000970727.S_lycopersicum_scaffolds_from_contigs.2.40.agp.SL2.40sc05611.agp.gap.agp.cov
                                                      S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.SL2.40ch12.SL2.40sc05611.JBPP0904_primer.000000967164-000000970727.S_lycopersicum_scaffolds_from_contigs.2.40.agp.SL2.40sc05611.agp.other.agp.cov
                                                      S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.SL2.40ch12.SL2.40sc05611.JBPP0904_primer.000000967164-000000970727.S_lycopersicum_scaffolds_from_contigs.2.40.agp.SL2.40sc05611.agp.unknown.agp.cov
                                                      S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.SL2.40ch12.SL2.40sc05611.JBPP0904_primer.000000967164-000000970727.S_lycopersicum_scaffolds.fa_NONE.tab.SL2.40ch12.tab.SL2.40sc05611.tab.cov
    """
                                                      
    config['in_base_name'] = "%(data_folder)s/%(RUN)s/S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.%(chromosome)s.%(scaffold)s.%(BAC)s_primer.%(BAC_coord)s" % config
    KmerCoverageFile       =                                                                                                                                      "%(in_base_name)s.S_lycopersicum_chromosomes.fa_S_lycopersicum_scaffolds.sam.%(chromosome)s.sam.%(scaffold)s.sam.cov.prop.cov" % config
    SequencingCoverageFile =                                                                                                                                      "%(in_base_name)s.S_lycopersicum_chromosomes.fa_S_lycopersicum_chromosomes.pos.%(chromosome)s.pos.cov.%(scaffold)s.cov"        % config
    BlastCoverageFile      =                                                                                                                                      "%(in_base_name)s_Product.fasta.blast.cov"                                                         % config
    AgpContigFile          =                                                                                                                                      "%(in_base_name)s.S_lycopersicum_scaffolds_from_contigs.2.40.agp.%(scaffold)s.agp.contig.agp.cov"  % config
    AgpGapFile             =                                                                                                                                      "%(in_base_name)s.S_lycopersicum_scaffolds_from_contigs.2.40.agp.%(scaffold)s.agp.gap.agp.cov"     % config
    AgpOtherFile           =                                                                                                                                      "%(in_base_name)s.S_lycopersicum_scaffolds_from_contigs.2.40.agp.%(scaffold)s.agp.other.agp.cov"   % config
    AgpUnknownFile         =                                                                                                                                      "%(in_base_name)s.S_lycopersicum_scaffolds_from_contigs.2.40.agp.%(scaffold)s.agp.unknown.agp.cov" % config
    NsFile                 =                                                                                                                                      "%(in_base_name)s.S_lycopersicum_scaffolds.fa_NONE.tab.%(chromosome)s.tab.%(scaffold)s.tab.cov"    % config
    
    infiles                = [KmerCoverageFile, SequencingCoverageFile, 
                              BlastCoverageFile, AgpContigFile, 
                              AgpGapFile, AgpUnknownFile, 
                              AgpOtherFile, NsFile]
print "%-22s: %-5s %s" % ( "KmerCoverageFile"      , os.path.exists(KmerCoverageFile)      , KmerCoverageFile       )
print "%-22s: %-5s %s" % ( "SequencingCoverageFile", os.path.exists(SequencingCoverageFile), SequencingCoverageFile )
print "%-22s: %-5s %s" % ( "AgpContigFile"         , os.path.exists(AgpContigFile)         , AgpContigFile          )
print "%-22s: %-5s %s" % ( "AgpGapFile"            , os.path.exists(AgpGapFile)            , AgpGapFile             )
print "%-22s: %-5s %s" % ( "AgpOtherFile"          , os.path.exists(AgpOtherFile)          , AgpOtherFile           )
print "%-22s: %-5s %s" % ( "AgpUnknownFile"        , os.path.exists(AgpUnknownFile)        , AgpUnknownFile         )
print "%-22s: %-5s %s" % ( "NsFile"                , os.path.exists(NsFile)                , NsFile                 )
if BAC_MODE:
    print "%-22s: %-5s %s" % ( "BlastCoverageFile" , os.path.exists(BlastCoverageFile)     , BlastCoverageFile      )
print
if not all([os.path.exists(x) for x in infiles]):
    print "missing file"
    #quit()
else:
    print "all files present"
    
    
In [117]:
    
config['out_extension'] = OUTPUT_FORMAT
config['out_bn'       ] = "%(out_folder)s/%(out_extension)s/%(RUN)s_%(chromosome)s_%(scaffold)s_prop" % config
config['out_bn_img'   ] = "%(out_bn)s.%%s.%(out_extension)s"                                          % config
if BAC_MODE:
    config['out_bn'       ] = "%(out_folder)s/%(out_extension)s/%(RUN)s_%(chromosome)s_%(scaffold)s_%(BAC)s_%(BAC_coord)s" % config
    config['out_bn_img'   ] = "%(out_bn)s.%%s.%(out_extension)s"                                          % config
output_files = {
    'all_data'                        : '%(out_bn)s.csv'     % config,
    'K-mer Coverage Stats'            : config['out_bn_img'] % 'K-mer_Coverage_Stats',
    'Sequencing Coverage Stats'       : config['out_bn_img'] % 'Sequencing_Coverage_Stats',
    'K-mer Coverage Distribution'     : config['out_bn_img'] % 'K-mer_Coverage_Distribution',
    'Sequencing Coverage Distribution': config['out_bn_img'] % 'Sequencing_Coverage_Distribution',
    'Gaps Distribution'               : config['out_bn_img'] % 'Gaps_Distribution',
    'Ns Distribution'                 : config['out_bn_img'] % 'Ns_Distribution',
    'Combined graph'                  : config['out_bn_img'] % 'Combined_graph'
}
if BAC_MODE:
    output_files['BLAST Coverage Stats'       ] = config['out_bn_img'] % 'BLAST_Coverage_Stats'
    output_files['BLAST Coverage Distribution'] = config['out_bn_img'] % 'BLAST_Coverage_Distribution'
    
for of in sorted(output_files.keys()):
    print "%-32s: %s" % ( of, output_files[of] )
    
    
In [118]:
    
class size_controller(object):
    def __init__(self, w, h):
        self.w = w
        self.h = h
        
    def __enter__(self):
        self.o = rcParams['figure.figsize']
        rcParams['figure.figsize'] = self.w, self.h
        return None
    
    def __exit__(self, type, value, traceback):
        rcParams['figure.figsize'] = self.o
        
col_type_int  = np.int64
col_type_flo  = np.float64
col_type_str  = np.object
col_type_bol  = np.int8
    
In [119]:
    
col_info = [
    [ "Position"                             , col_type_int ], 
    [ "K-mer Coverage"                       , col_type_flo ], 
    [ "K-mer Coverage averaged: 500 bp"      , col_type_flo ], 
    [ "K-mer Coverage averaged: 2.5 Kbp"     , col_type_flo ], 
    [ "K-mer Coverage averaged: 5 Kbp"       , col_type_flo ], 
    [ "K-mer Coverage averaged: 50 Kbp"      , col_type_flo ], 
    [ "K-mer Coverage averaged: 1 Mbp"       , col_type_flo ], 
    [ "K-mer Coverage averaged: 5 Kbp before", col_type_flo ], 
    [ "K-mer Coverage averaged: 5 Kbp after" , col_type_flo ]
]
col_names=[cf[0] for cf in col_info]
col_types=dict(zip([c[0] for c in col_info], [c[1] for c in col_info]))
if PARSE_VERBOSE:
    print "\n".join( col_names )
    
In [120]:
    
SKIP_ROWS = 1
print KmerCoverageFile
KmerData  = pd.read_csv(KmerCoverageFile, header=None, names=col_names, dtype=col_types, nrows=NROWS, \
                        skiprows=SKIP_ROWS, verbose=PARSE_VERBOSE, delimiter="\t", comment="#")
if PARSE_VERBOSE:
    print KmerData.head()
    
print "Loaded %d rows and %d columns" % ( KmerData.shape[0], KmerData.shape[1] )
    
    
In [121]:
    
col_info = [
    [ "Position"                             , col_type_int ], 
    [ "Sequencing Coverage"                  , col_type_int ]
]
col_names=[cf[0] for cf in col_info]
col_types=dict(zip([c[0] for c in col_info], [c[1] for c in col_info]))
if PARSE_VERBOSE:
    print col_names
    col_types
    
In [122]:
    
SKIP_ROWS = 0
print SequencingCoverageFile
SequencingCoverageData  = pd.read_csv(SequencingCoverageFile, header=None, names=col_names, dtype=col_types, nrows=NROWS, \
                        skiprows=SKIP_ROWS, verbose=PARSE_VERBOSE, delimiter="\t", comment="#")
if PARSE_VERBOSE:
    print SequencingCoverageData.head()
print "Loaded %d rows and %d columns" % ( SequencingCoverageData.shape[0], SequencingCoverageData.shape[1] )
    
    
In [123]:
    
if BAC_MODE:
    addHeader(1,'BLAST')
    col_info = [
        [ "Position"                             , col_type_int ], 
        [ "BLAST Coverage"                       , col_type_flo ]
    ]
    col_names=[cf[0] for cf in col_info]
    col_types=dict(zip([c[0] for c in col_info], [c[1] for c in col_info]))
    if PARSE_VERBOSE:
        print col_names
        col_types
    
    
In [124]:
    
if BAC_MODE:
    SKIP_ROWS = 0
    
    print BlastCoverageFile
    
    BlastCoverageData  = pd.read_csv(BlastCoverageFile, header=None, names=col_names, dtype=col_types, nrows=NROWS, \
                            skiprows=SKIP_ROWS, verbose=PARSE_VERBOSE, delimiter="\t", comment="#")
    
    if PARSE_VERBOSE:
        print BlastCoverageData.head()
    
    print "Loaded %d rows and %d columns" % ( BlastCoverageData.shape[0], BlastCoverageData.shape[1] )
    
    
In [125]:
    
SKIP_ROWS = 0
    
In [126]:
    
col_info = [
    [ "Position"                    , col_type_int ], 
    [ "AGP Contig"                  , col_type_bol ]
]
col_names=[cf[0] for cf in col_info]
col_types=dict(zip([c[0] for c in col_info], [c[1] for c in col_info]))
if PARSE_VERBOSE:
    print col_names
    col_types
    
In [127]:
    
print AgpContigFile
AgpContigData  = pd.read_csv(AgpContigFile, header=None, names=col_names, dtype=col_types, nrows=NROWS, \
                        skiprows=SKIP_ROWS, verbose=PARSE_VERBOSE, delimiter="\t", comment="#")
if PARSE_VERBOSE:
    print AgpContigData.head()
    
print "Loaded %d rows and %d columns" % ( AgpContigData.shape[0], AgpContigData.shape[1] )
    
    
In [128]:
    
col_info = [
    [ "Position"                    , col_type_int ], 
    [ "AGP Gap"                     , col_type_bol ]
]
col_names=[cf[0] for cf in col_info]
col_types=dict(zip([c[0] for c in col_info], [c[1] for c in col_info]))
if PARSE_VERBOSE:
    print col_names
    col_types
    
In [129]:
    
print AgpGapFile
AgpGapData  = pd.read_csv(AgpGapFile, header=None, names=col_names, dtype=col_types, nrows=NROWS, \
                        skiprows=SKIP_ROWS, verbose=PARSE_VERBOSE, delimiter="\t", comment="#")
if PARSE_VERBOSE:
    print AgpGapData.head()
    
print "Loaded %d rows and %d columns" % ( AgpGapData.shape[0], AgpGapData.shape[1] )
    
    
In [130]:
    
col_info = [
    [ "Position"                    , col_type_int ], 
    [ "AGP Unknown"                 , col_type_bol ]
]
col_names=[cf[0] for cf in col_info]
col_types=dict(zip([c[0] for c in col_info], [c[1] for c in col_info]))
if PARSE_VERBOSE:
    print col_names
    col_types
    
In [131]:
    
print AgpUnknownFile
AgpUnknownData  = pd.read_csv(AgpUnknownFile, header=None, names=col_names, dtype=col_types, nrows=NROWS, \
                        skiprows=SKIP_ROWS, verbose=PARSE_VERBOSE, delimiter="\t", comment="#")
if PARSE_VERBOSE:
    print AgpUnknownData.head()
    
print "Loaded %d rows and %d columns" % ( AgpUnknownData.shape[0], AgpUnknownData.shape[1] )
    
    
In [132]:
    
col_info = [
    [ "Position"                    , col_type_int ], 
    [ "AGP Other"                   , col_type_bol ]
]
col_names=[cf[0] for cf in col_info]
col_types=dict(zip([c[0] for c in col_info], [c[1] for c in col_info]))
if PARSE_VERBOSE:
    print col_names
    col_types
    
In [133]:
    
print AgpOtherFile
AgpOtherData  = pd.read_csv(AgpOtherFile, header=None, names=col_names, dtype=col_types, nrows=NROWS, \
                        skiprows=SKIP_ROWS, verbose=PARSE_VERBOSE, delimiter="\t", comment="#")
if PARSE_VERBOSE:
    print AgpOtherData.head()
    
print "Loaded %d rows and %d columns" % ( AgpOtherData.shape[0], AgpOtherData.shape[1] )
    
    
In [134]:
    
col_info = [
    [ "Position"                    , col_type_int ], 
    [ "Ns"                          , col_type_bol ]
]
col_names=[cf[0] for cf in col_info]
col_types=dict(zip([c[0] for c in col_info], [c[1] for c in col_info]))
if PARSE_VERBOSE:
    print col_names
    col_types
    
In [135]:
    
print NsFile
NsData  = pd.read_csv(NsFile, header=None, names=col_names, dtype=col_types, nrows=NROWS, \
                        skiprows=SKIP_ROWS, verbose=PARSE_VERBOSE, delimiter="\t", comment="#")
if PARSE_VERBOSE:
    print NsData.head()
    
print "Loaded %d rows and %d columns" % ( NsData.shape[0], NsData.shape[1] )
    
    
In [136]:
    
Data = pd.DataFrame(KmerData, copy=True)
Data.combine_first( KmerData )
Data["Sequencing Coverage"] = SequencingCoverageData["Sequencing Coverage"]
if BAC_MODE:
    Data["BLAST Coverage"     ] = BlastCoverageData["BLAST Coverage"]
Data["AGP Contig"         ] = AgpContigData["AGP Contig"]
Data["AGP Gap"            ] = AgpGapData["AGP Gap"]
Data["AGP Unknown"        ] = AgpUnknownData["AGP Unknown"]
Data["AGP Other"          ] = AgpOtherData["AGP Other"]
Data["Ns"                 ] = NsData["Ns"]
if PARSE_VERBOSE:
    print Data.head()
print "saving to CSV"
Data.to_csv(output_files['all_data'], sep='\t', index=False)
print "Saved %d rows and %d columns" % ( Data.shape[0], Data.shape[1] )
    
    
In [137]:
    
with size_controller(FULL_FIG_W, FULL_FIG_H):
    bqc = Data.boxplot(column=["K-mer Coverage", 
                               "K-mer Coverage averaged: 500 bp",
                               "K-mer Coverage averaged: 2.5 Kbp",
                               "K-mer Coverage averaged: 5 Kbp",
                               "K-mer Coverage averaged: 50 Kbp",
                               "K-mer Coverage averaged: 1 Mbp",
                               "K-mer Coverage averaged: 5 Kbp before",
                               "K-mer Coverage averaged: 5 Kbp after"], return_type='dict', rot=45)
    plt.setp(bqc['boxes'   ], color='black')
    plt.setp(bqc['medians' ], color='black')
    plt.setp(bqc['whiskers'], color='black')
    plt.setp(bqc['fliers'  ], color='black')
    
    savefig(output_files['K-mer Coverage Stats'])
    
    
In [138]:
    
with size_controller(FULL_FIG_W, FULL_FIG_H):
    bqc = Data.boxplot(column=['Sequencing Coverage'], return_type='dict')
    plt.setp(bqc['boxes'   ], color='black')
    plt.setp(bqc['medians' ], color='black')
    plt.setp(bqc['whiskers'], color='black')
    plt.setp(bqc['fliers'  ], color='black')
    savefig(output_files['Sequencing Coverage Stats'])
    
    
In [139]:
    
if BAC_MODE:
    addHeader(2,'BLAST Coverage Stats')
    
    with size_controller(FULL_FIG_W, FULL_FIG_H):
        bqc = Data.boxplot(column=['BLAST Coverage'], return_type='dict')
        plt.setp(bqc['boxes'   ], color='black')
        plt.setp(bqc['medians' ], color='black')
        plt.setp(bqc['whiskers'], color='black')
        plt.setp(bqc['fliers'  ], color='black')
        savefig(output_files['BLAST Coverage Stats'])
    
    
    
In [140]:
    
with size_controller(FULL_FIG_W, FULL_FIG_H):
    hf = Data[ Data["K-mer Coverage"] > 0 ]['Position']
    print hf.size
    if hf.size > 0:
        hs = hf.hist(color=HISTOGRAM_COLOR)
        savefig(output_files['K-mer Coverage Distribution'])
    
    
    
In [141]:
    
with size_controller(FULL_FIG_W, FULL_FIG_H):
    hf = Data[ Data["Sequencing Coverage"] > 0 ]['Position']
    print hf.size
    if hf.size > 0:
        hs = hf.hist(color=HISTOGRAM_COLOR)
        savefig(output_files['Sequencing Coverage Distribution'])
    
    
    
In [142]:
    
if BAC_MODE:
    addHeader(2,'BLAST Coverage Distribution')
    
    with size_controller(FULL_FIG_W, FULL_FIG_H):
        hf = Data[ Data["BLAST Coverage"] > 0 ]['Position']
        print hf.size
        if hf.size > 0:
            hs = hf.hist(color=HISTOGRAM_COLOR)
            savefig(output_files['BLAST Coverage Distribution'])
    
    
    
    
In [143]:
    
with size_controller(FULL_FIG_W, FULL_FIG_H):
    hf = Data[ Data["AGP Gap"] > 0 ]['Position']
    print hf.size
    if hf.size > 0:
        hs = hf.hist(color=HISTOGRAM_COLOR)
        savefig(output_files['Gaps Distribution'])
    
    
In [144]:
    
with size_controller(FULL_FIG_W, FULL_FIG_H):
    hf = Data[ Data["Ns"] > 0 ]['Position']
    print hf.size
    if hf.size > 0:
        hs = hf.hist(color=HISTOGRAM_COLOR)
        savefig(output_files['Ns Distribution'])
    
    
In [145]:
    
num_cols = len(cols_to_plot)
with size_controller(CHROM_FIG_W, CHROM_FIG_H):
    if True:
        #fig, axes = plt.subplots(nrows=len(cols_to_plot)+2, ncols=1)
        plt.subplots_adjust(wspace=0.5, hspace=0.5)
        num_extra_rows = 2
        num_row_span   = 3
        
        if BAC_MODE:
            num_extra_rows = 3
            num_row_span   = 3
        
        num_cols_e     = num_cols + (num_extra_rows*num_row_span)
        axes           = []
        axes.append( plt.subplot2grid((num_cols_e,1), (0           , 0), rowspan=num_row_span) )
        axes.append( plt.subplot2grid((num_cols_e,1), (num_row_span, 0), rowspan=num_row_span) )
        
        
        if BAC_MODE:
            axes.append( plt.subplot2grid((num_cols_e,1), (num_row_span*2, 0), rowspan=num_row_span) )
            
        for i in xrange ( num_cols - num_extra_rows ):
            axes.append( plt.subplot2grid((num_cols_e,1), ((num_extra_rows*num_row_span)+i, 0) ) )
            
        for col_to_plot_i, col_to_plot_info in enumerate(cols_to_plot):
            col_to_plot, col_ylim, col_yticks = col_to_plot_info
            
            axis = axes[col_to_plot_i]
            Data[col_to_plot].plot(ax=axis, kind='area', stacked=False, color=ALL_GRAPH_COLOR)
            
            axis.set_title(col_to_plot)
            
            
            if col_ylim is not None:
                col_ylim_min, col_ylim_max = col_ylim
                
                if col_ylim_min is not None:
                    axis.set_ylim( bottom = col_ylim_min )
                
                if col_ylim_max is not None:
                    axis.set_ylim( top    = col_ylim_max )
            
            
            if col_yticks is not None:
                if col_yticks == 0:
                    axis.set_yticks([])
                    
                else:
                    ylim_min, ylim_max = axis.get_ylim()
                    ylim_diff = ylim_max - ylim_min
                    ylim_step = ylim_diff / (col_yticks*1.0)
                    #print col_to_plot, ylim_min, ylim_max, ylim_diff, ylim_step
                    axis.set_yticks(np.arange(ylim_min,ylim_max+ylim_step,ylim_step))
    savefig(output_files['Combined graph'])
    
    
In [146]:
    
#%install_ext https://raw.githubusercontent.com/rasbt/python_reference/master/ipython_magic/watermark.py
%reload_ext watermark
%watermark --author "Saulo Aflitos" --date --updated --python --hostname --machine --githash --packages numpy,scipy,matplotlib,pandas,IPython
    
    
In [147]:
    
#http://stackoverflow.com/questions/27934885/how-to-hide-code-from-cells-in-ipython-notebook-visualized-with-nbviewer
#from IPython.display import HTML
HTML('''<script>
code_show=true; 
function code_toggle() {
 var classes_to_hide = ['div.input', 'div.output_stderr', 'div.output_prompt', 'div.input_prompt', 'div.prompt'];
 if (code_show){
    for ( var c in classes_to_hide ) {
        $(classes_to_hide[c]).hide();
        $(classes_to_hide[c]).css('visibility', 'hidden');
    }
 } else {
    for ( var c in classes_to_hide ) {
         $(classes_to_hide[c]).show();
         $(classes_to_hide[c]).css('visibility', 'visible');
    }
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
    
    Out[147]: