In [91]:
from __future__ import division
import os
import re
import json
import numpy as np
import pandas as pd
%matplotlib nbagg
import matplotlib.pyplot as plt
import plotly.plotly as py
import plotly.graph_objs as go
In [36]:
def fig_3d(d, n):
p_data = [
go.Surface(
z=d.as_matrix()
)
]
layout = go.Layout(
title=n,
autosize=False,
width=500,
height=500,
margin=dict(l=65, r=50, b=65, t=90)
)
return go.Figure(data=p_data, layout=layout)
def get_reads_data(run):
run_data = {}
dir_data = os.path.join(os.path.join('/Users/codeunsolved/NGS/NGS-Dashboard/data', 'BRCA%s' % run), 'sample_cover')
for sample in os.listdir(dir_data):
dir_sample = os.path.join(dir_data, sample)
if os.path.isdir(dir_sample):
path_sdp = os.path.join(dir_sample, 'sample_data_pointer.json')
with open(path_sdp, 'rb') as sdp:
run_data[sample] = json.loads(sdp.read())['frag_reads']
return pd.DataFrame(run_data)
def norm_data(d, option='double'):
if option == 'by_s':
return (d - d.min())/(d.max() - d.min())
elif option == 'by_a':
return norm_data(d.T, option='by_s').T
elif option == 'double':
return norm_data(norm_data(d, 'by_s'), 'by_a')
else:
raise Exception("Unknown Option: %s" % option)
def coverage_uniforminity(d):
return pd.DataFrame([round(len(data[c][data[c] > data[c].mean()*0.2]) / len(data[c]) * 100, 2) for c in data.columns], data.columns, columns=["0.2x"])
In [34]:
data = get_reads_data(161116)
data.head(3)
Out[34]:
In [37]:
cu_0_2 = coverage_uniforminity(data)
print cu_0_2
In [10]:
py.iplot(fig_3d(data, 'BRCA161116'), filename='BRCA161116')
Out[10]:
In [11]:
py.iplot(fig_3d(norm_data(data, 'by_s'), 'BRCA161116_norm_sample'), filename='BRCA161116_norm_sample')
Out[11]:
In [12]:
py.iplot(fig_3d(norm_data(data, 'double'), 'BRCA161116_norm_double'), filename='BRCA161116_norm_double')
Out[12]:
In [13]:
norm_data(data, 'double')['NGS161111-6-2'].plot()
Out[13]:
In [14]:
norm_data(data, 'double')['NGS161111-7-2'].plot()
Out[14]:
In [111]:
def plot_brca_largeindel(d, dir_pic):
def choose_one(d):
cu_sort = coverage_uniforminity(d).sort_values(by='0.2x')
one = cu_sort.iloc[-1]
if one.values[0] < 98:
print "[WARNING] Max Coverage Uniformity 0.2x < 98%%: %s" % one
return one
def get_plot_data():
p_data = (d.T / d[choose_one(data).name]).T
for s in p_data:
if p_data[s].max() < 0.8:
print "[WARNING] Sample ID: %s's data quality is low, poped!" % s
p_data.pop(s)
elif p_data[s].min() > 1:
print "[WARNING] Sample ID: %s's data is over amplified, poped!" % s
return p_data
dir_pic = os.path.join('.', 'pic/%s' % dir_pic)
if not os.path.exists(dir_pic):
print "[WARNING] %s doesn't exist, create it!" % dir_pic
os.makedirs(dir_pic)
plot_data = get_plot_data()
plt.violinplot([plot_data.T[a] for a in plot_data.T])
plt.show()
plot_data.plot()
In [112]:
plot_brca_largeindel(data, '161116')