Idx Stats Report

This report provides information from the output of samtools idxstats tool. It outputs the number of mapped reads per chromosome/contig.



In [ ]:


In [1]:
from IPython.display import display, Markdown
from IPython.display import HTML
import IPython.core.display as di
import csv
import numpy as np
import zlib
import CGAT.IOTools as IOTools
import itertools as ITL
import os
import string
import pandas as pd
import sqlite3
import matplotlib as mpl
from matplotlib.backends.backend_pdf import PdfPages  # noqa: E402
#mpl.use('Agg')  # noqa: E402
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import matplotlib.font_manager as font_manager
import matplotlib.lines as mlines
from matplotlib.colors import ListedColormap
from matplotlib import cm
from matplotlib import rc, font_manager
import CGAT.Experiment as E
import math
from random import shuffle
import matplotlib as mpl
import datetime
import seaborn as sns
import nbformat
%matplotlib inline  


##################################################
#Plot customization
#plt.ioff()
plt.style.use('seaborn-white')
#plt.style.use('ggplot')
title_font = {'size':'20','color':'darkblue', 'weight':'bold', 'verticalalignment':'bottom'} # Bottom vertical alignment for more space
axis_font = {'size':'18', 'weight':'bold'}
#For summary page pdf
'''To add description page
plt.figure() 
plt.axis('off')
plt.text(0.5,0.5,"my title",ha='center',va='center')
pdf.savefig()
'''
#Panda data frame cutomization
pd.options.display.width = 80
pd.set_option('display.max_colwidth', -1)

chr_feature=['total_reads','total_mapped_reads',
                  'chr1','chr2','chr3','chr4',
                  'chr5','chr6','chr7','chr8',
                  'chr9','chr10','chr11','chr12',
                  'chr13','chr14','chr15','chr16',
                  'chr17','chr18','chr19','chrX',    
                  'chrY','chrM']
chr_index=['Total reads','Total mapped reads',
                  'chr1','chr2','chr3','chr4',
                  'chr5','chr6','chr7','chr8',
                  'chr9','chr10','chr11','chr12',
                  'chr13','chr14','chr15','chr16',
                  'chr17','chr18','chr19','chrX',    
                  'chrY','chrM']
colors_category = ['red','green','darkorange','yellowgreen', 'pink', 'gold', 'lightskyblue', 
                   'orchid','darkgoldenrod','skyblue','b', 'red', 
                   'darkorange','grey','violet','magenta','cyan',
                   'hotpink','mediumslateblue']
threshold = 5

def hover(hover_color="#ffff99"):
    return dict(selector="tr:hover",
                props=[("background-color", "%s" % hover_color)])

def y_fmt(y, pos):
    decades = [1e9, 1e6, 1e3, 1e0, 1e-3, 1e-6, 1e-9 ]
    suffix  = ["G", "M", "k", "" , "m" , "u", "n"  ]
    if y == 0:
        return str(0)
    for i, d in enumerate(decades):
        if np.abs(y) >=d:
            val = y/float(d)
            signf = len(str(val).split(".")[1])
            if signf == 0:
                return '{val:d} {suffix}'.format(val=int(val), suffix=suffix[i])
            else:
                if signf == 1:
                    #print(val, signf)
                    if str(val).split(".")[1] == "0":
                       return '{val:d} {suffix}'.format(val=int(round(val)), suffix=suffix[i]) 
                tx = "{"+"val:.{signf}f".format(signf = signf) +"} {suffix}"
                return tx.format(val=val, suffix=suffix[i])

                #return y
    return y

def getTables(dbname):
    '''
    Retrieves the names of all tables in the database.
    Groups tables into dictionaries by annotation
    '''
    dbh = sqlite3.connect(dbname)
    c = dbh.cursor()
    statement = "SELECT name FROM sqlite_master WHERE type='table'"
    c.execute(statement)
    tables = c.fetchall()
    print(tables)
    c.close()
    dbh.close()
    return 

def readDBTable(dbname, tablename):
    '''
    Reads the specified table from the specified database.
    Returns a list of tuples representing each row
    '''
    dbh = sqlite3.connect(dbname)
    c = dbh.cursor()
    statement = "SELECT * FROM %s" % tablename
    c.execute(statement)
    allresults = c.fetchall()
    c.close()
    dbh.close()
    return allresults

def getDBColumnNames(dbname, tablename):
    dbh = sqlite3.connect(dbname)
    res = pd.read_sql('SELECT * FROM %s' % tablename, dbh)
    dbh.close()
    return res.columns


def plotBar(df,samplename):
    fig, ax = plt.subplots()
    ax.set_frame_on(True)
    ax.xaxis.set_major_formatter(FuncFormatter(y_fmt))
    colors=['yellowgreen','darkorange']
    for ii in range(0,df.shape[0]):
        plt.barh(ii,df['chrX'][ii],color=colors[0], align="center",height=0.6,edgecolor=colors[0])
        plt.barh(ii,df['chrY'][ii],color=colors[1], align="center",height=0.6,edgecolor=colors[0])
    fig = plt.gcf()
    fig.set_size_inches(20,14)
    plt.yticks(fontsize =20,weight='bold')
    plt.yticks(range(df.shape[0]),df['track'])
    plt.xticks(fontsize =20,weight='bold')
    ax.grid(which='major', linestyle='-', linewidth='0.3')
    plt.ylabel("Sample",labelpad=65,fontsize =25,weight='bold')
    plt.xlabel("\nMapped reads",fontsize =25,weight='bold')
    plt.title("Reads mapped to X and Y chromosome\n",fontsize =30,weight='bold',color='darkblue')
    plt.gca().invert_yaxis()
    legend_properties = {'weight':'bold','size':'20'}
    leg = plt.legend(chr_feature[21:23],title="Contigs",prop=legend_properties,bbox_to_anchor=(1.14,0.65),frameon=True)
    leg.get_frame().set_edgecolor('k')
    leg.get_frame().set_linewidth(2)
    leg.get_title().set_fontsize(25)
    leg.get_title().set_fontweight('bold')
    plt.tight_layout()
    #plt.savefig(''.join([samplename,'.png']),bbox_inches='tight',pad_inches=0.6)
    plt.show()
    return fig

def displayTable(plotdf,name):
    # Display table
    styles = [
    hover(),
    dict(selector="th", props=[("font-size", "130%"),
                               ("text-align", "center"),
                              ]),                               
    dict(selector="td", props=[("font-size", "120%"),
                               ("text-align", "center"),
                              ]),
    dict(selector="caption", props=[("caption-side", "top"),
                                   ("text-align", "center"),
                                   ("font-size", "100%")])
    ]
    df1 = (plotdf.style.set_table_styles(styles).set_caption(name))
    display(df1)
    print("\n\n")
    
def plot_idxstats(newdf,df,samplename):
        
        fig,ax = plt.subplots()
        ax.grid(which='major', linestyle='-', linewidth='0.25')
        ax.yaxis.set_major_formatter(FuncFormatter(y_fmt))
        index=list(range(newdf.shape[1]))
        colors = plt.cm.plasma(np.linspace(0,1,newdf.shape[0]))
        for ii in range(0,newdf.shape[0]):
            plt.plot(index,newdf.iloc[ii],linewidth=2,color=colors[ii],linestyle="-",marker='o',fillstyle='full',markersize=8)
        fig = plt.gcf()
        fig.set_size_inches(11,8)
        plt.xticks(index,chr_feature[2:24],fontsize = 14,weight='bold')
        plt.yticks(fontsize = 14,weight='bold')
        labels = ax.get_xticklabels()
        plt.setp(labels, rotation=40)
        legend_properties = {'weight':'bold','size':'14'}
        leg = plt.legend(df['track'],title="Sample",prop=legend_properties,bbox_to_anchor=(1.42,1.01),frameon=True)
        leg.get_frame().set_edgecolor('k')
        leg.get_frame().set_linewidth(2)
        leg.get_title().set_fontsize(16)
        leg.get_title().set_fontweight('bold')
        plt.xlabel('\nContigs',**axis_font)
        plt.ylabel('Mapped Reads',**axis_font,labelpad=40)
        plt.title("Mapped reads per contig", **title_font)
        plt.tight_layout()
        #plt.savefig(''.join([samplename,'.png']),bbox_inches='tight',pad_inches=0.6)
        print("\n\n")
        plt.show()
        return fig
        
def idxStatsReport(dbname, tablename):
    trans = pd.DataFrame(readDBTable(dbname,tablename))
    trans.columns = getDBColumnNames(dbname,tablename)
    df=trans
    #print(df)
    #newdf = df[df.columns[0:25]] 
    newdf = df[chr_feature[2:24]]
    #print(newdf)
    plotdf = df[chr_feature]
    plotdf.columns = chr_index
    plotdf.index = [df['track']]
    #del plotdf.index.name
    #pdf=PdfPages("idx_stats_summary.pdf")
    displayTable(plotdf,"Idx Full Stats")
    fig = plot_idxstats(newdf,df,"idx_full_stats")
    #pdf.savefig(fig,bbox_inches='tight',pad_inches=0.6)
    print("\n\n\n")
    fig = plotBar(df,"idxStats_X_Y_mapped_reads")
    #pdf.savefig(fig,bbox_inches='tight',pad_inches=0.6)
    #pdf.close()
#getTables("csvdb")
idxStatsReport("../csvdb","idxstats_reads_per_chromosome")


Idx Full Stats
Total reads Total mapped reads chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX chrY chrM
track
('Brain-F1-R1',) 1192074 1192074 34913 21767 13563 20154 29136 18760 20359 7114 15194 5732 11620 19789 7453 4012 14348 9412 9927 4175 890182 23373 842 0
1241349 1241349 35718 22270 13618 21016 30095 19362 21023 7319 15779 5709 11751 20190 7530 3921 15030 9355 9920 4268 932120 24184 878 0
1137384 1137384 31529 19892 12075 18033 26413 17041 17941 6733 13932 5020 10377 18013 6578 3517 13323 8654 8792 3800 864271 21667 710 1
1256621 1256621 34697 21556 12877 20321 29359 18698 19871 7066 15249 5447 11467 19807 7080 3721 14825 9242 9456 4125 957180 24259 841 0
806848 806848 15465 11070 6746 7595 8725 7366 8116 4455 6400 4592 5044 7987 2903 2839 5341 5646 5545 2083 674575 7221 754 0
892148 892148 15843 11609 6090 7562 8726 7408 7811 4537 6419 4070 4956 7895 2839 2673 5324 5435 5014 2045 761597 7582 730 0








Created with Jupyter,by Reshma.