In [1]:
import myvariant, mygene, os, plotly, json, textwrap, re, glob, pickle
from IPython.display import IFrame
#from wand.image import Image as WImage
from networkx.readwrite import json_graph
import plotly
plotly.offline.init_notebook_mode()
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
from IPython.core.display import display, HTML
from ipywidgets import interact, interactive, fixed,Layout, Button, Box
import ipywidgets as widgets
from collections import OrderedDict
#def out_sad_fig(G):
#plotly plotting function for networkx graphs
def in_json_out_plotly_fig(json_name, make_3D, global_in):
#Load JSON graph
with open(json_name,"r") as json_comm:
json_in = json.load(json_comm)
G = json_graph.node_link_graph(json_in)
width=500
height=500
axis=dict(showbackground=False,
showline=False,
zeroline=False,
showgrid=False,
showticklabels=False,
title="")
if global_in:
layout = go.Layout(
width=240,
height=240,
xaxis=go.XAxis(axis),
yaxis=go.YAxis(axis),
showlegend=False,
scene=go.Scene(
xaxis=go.XAxis(axis),
yaxis=go.YAxis(axis),
zaxis=go.ZAxis(axis)),
margin=go.Margin(l=0,
r=0,
b=0,
t=50),
hovermode="closest")
layout["title"] = "Whole Nucleus"
else:
layout = go.Layout(
width=800,
height=400,
xaxis=go.XAxis(axis),
yaxis=go.YAxis(axis),
showlegend=False,
scene=go.Scene(
xaxis=go.XAxis(axis),
yaxis=go.YAxis(axis),
zaxis=go.ZAxis(axis)),
margin=go.Margin(l=0,
r=0,
b=0,
t=0),
hovermode="closest")
layout["title"] = "<br> <a href=\"" + G.graph["ucsc_session"] + "\"> UCSC Browser View </a>"
#make node trace
#make 3d?
traceN = go.Scatter(x=[], y=[], mode="markers", text=[],marker=go.Marker(color=[],size=[],opacity=[]))
if make_3D:
traceN = go.Scatter3d(x=[], y=[], z=[], mode="markers", text=[],marker=go.Marker(color=[],size=[],opacity=[]))
traceN["name"] = ""
traceN["hoverinfo"] = "text"
for node in G.nodes(data=True):
text_node = ""
if "rsid" in node[1]:
text_node = "rsid: " + node[1]["rsid"] + "\n" + node[1]["name"]
elif "name" in node[1] and "fpkm" in node[1]:
text_node += node[0] + " " + node[1]["name"]
elif "name" in node[1]:
text_node += node[1]["name"]
else:
text_node += node[0]
if "fpkm" in node[1]:
text_node += "\nFPKM: " + str(node[1]["fpkm"])
if "tfbs" in node[1]:
text_node += "\n" + textwrap.fill("TFBS: " + node[1]["tfbs"])
if "cadd" in node[1]:
text_node += "\n" + textwrap.fill("CADD Score: " + str(node[1]["cadd"]))
if "deep_score" in node[1]:
text_node += "\n" + textwrap.fill("DeepBind Delta: " + str(node[1]["deep_score"]))
if "deepbind_tf" in node[1]:
text_node += "\n" + textwrap.fill("DeepBind Top TFs: " + str(node[1]["deepbind_tf"]))
if "grasp_pheno" in node[1]:
text_node += "\n" + textwrap.fill("GWAS Phenotype (GRASP): " + str(node[1]["grasp_pheno"]))
if "grasp_pmid" in node[1]:
text_node += "\n" + textwrap.fill("GWAS PMID (GRASP): " + str(node[1]["grasp_pmid"]))
if "gtex_eqtl_pval" in node[1]:
text_node += "\n" + textwrap.fill("GTEX eQTL P-value: " + str(node[1]["gtex_eqtl_pval"]))
if "sad_abs_sum" in node[1]:
text_node += "\n" + textwrap.fill("Sum of SAD across tissues: " + str(node[1]["sad_abs_sum"]))
traceN["text"].append(text_node.replace("\n","<br>"))
traceN["x"].append(node[1]["x"])
traceN["y"].append(node[1]["y"])
if make_3D:
traceN["z"].append(node[1]["z"])
if "color" in node[1]:
traceN["marker"]["color"].append(node[1]["color"])
else:
traceN["marker"]["color"].append("white")
if "size" in node[1]:
traceN["marker"]["size"].append(node[1]["size"])
else:
traceN["marker"]["size"].append(10)
if "opacity" in node[1]:
traceN["marker"]["opacity"].append(node[1]["opacity"])
else:
traceN["marker"]["opacity"].append(1)
#make edge trace and annotations
traceE = go.Scatter(x=[], y=[], mode="lines", hoverinfo = "none")
traceE_annot = go.Scatter(x=[], y=[], name="", mode="markers", text=[],marker=go.Marker(color=[],size=[],opacity=[]),hoverinfo="text")
if make_3D:
traceE = go.Scatter3d(x=[], y=[], z=[], mode="lines", hoverinfo = "none")
traceE_annot = go.Scatter3d(x=[], y=[], z=[], name="", mode="markers", text=[],marker=go.Marker(color=[],size=[],opacity=[]),hoverinfo="text")
traceE["name"] = ""
traceE["line"]["width"] = 1
for edge in G.edges(data=True):
traceE["x"] += [G.node[edge[0]]["x"],G.node[edge[1]]["x"], None]
traceE["y"] += [G.node[edge[0]]["y"],G.node[edge[1]]["y"], None]
if make_3D:
traceE["z"] += [G.node[edge[0]]["z"],G.node[edge[1]]["z"], None]
text_edge = ""
if "weight" in G.get_edge_data(*edge):
text_edge += "Weight: " + str(G.get_edge_data(*edge)["weight"])
traceE_annot["x"].append((float(G.node[edge[0]]["x"])+float(G.node[edge[1]]["x"]))/2.0)
traceE_annot["y"].append((float(G.node[edge[0]]["y"])+float(G.node[edge[1]]["y"]))/2.0)
if make_3D:
traceE_annot["z"].append((float(G.node[edge[0]]["z"])+float(G.node[edge[1]]["z"]))/2.0)
traceE_annot["marker"]["color"].append("black")
traceE_annot["marker"]["size"].append(5)
if "overlapped_tf" in G.get_edge_data(*edge):
text_edge += "<br>" + textwrap.fill("Deepbind TF overlap: " + str(G.get_edge_data(*edge)["overlapped_tf"]))
if "overlapped_tf_enc" in G.get_edge_data(*edge):
text_edge += "<br>" + textwrap.fill("Encode TF overlap: " + str(G.get_edge_data(*edge)["overlapped_tf_enc"]))
if text_edge:
traceE_annot["text"].append(text_edge)
data = go.Data([traceE, traceN, traceE_annot])
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig)
In [7]:
#Run case
def update_run_name(change):
tmp_run_names = [os.path.basename(c_type) for c_type in glob.glob("example_output/output_IN_graphs/" + change["new"] + "/*")]
#add dummy variable to switch to so it renders
(tmp_run_names).append(None)
name_picker.options = tmp_run_names
def update_global_graph(change):
if (change["new"] is not None):
new_out_dir = "example_output/output_IN_graphs/" + cell_picker.value + "/" + change["new"] + "/"
cur_global_graph = new_out_dir + change["new"] + "_global_graph_annotated.json"
with open(cur_global_graph,"r") as json_comm:
json_in = json.load(json_comm)
draft_genome_out_annot = json_graph.node_link_graph(json_in)
in_names_2json = dict()
for node in draft_genome_out_annot.nodes(data=True):
in_genes = node[1]["name"].split(" ")[1].split(",")
in_names_2json[node[1]["name"].split(" ")[1]] = new_out_dir + node[1]["json_name"]
sorted_in_names_2json = [(k, in_names_2json[k]) for k in sorted(in_names_2json, key=in_names_2json.get, reverse=True)]
sorted_IN_jsons = OrderedDict(sorted_in_names_2json)
in_picker.options = sorted_IN_jsons
sad_table_wid.options = sorted_IN_jsons
#Print attributes
interact(in_json_out_plotly_fig, json_name=fixed(cur_global_graph),make_3D=fixed(True), global_in=fixed(True));
print("Species: " + draft_genome_out_annot.graph["species"])
print("Genome Version: " + draft_genome_out_annot.graph["genome_version"])
print("Cell Type: " + draft_genome_out_annot.graph["tissue_type"])
def draw_sad_table(in_json_name):
out_dir = os.path.dirname(in_json_name) + "/"
#Fetch the top SNP and max min for Genome browser for target IN
with open(in_json_name,"r") as json_comm:
json_in = json.load(json_comm)
in_annot_graph = json_graph.node_link_graph(json_in)
unserial_sad_table = pickle.loads(in_annot_graph.graph["sad_table"])
for in_node in in_annot_graph.nodes(data=True):
if "in_name" in in_node[1]:
cur_in_name = in_node[1]["in_name"]
target_in_name = in_node[1]["in_name"]
cur_in_min = in_node[1]["in_min"]
cur_in_max = in_node[1]["in_max"]
if "top_open_snp" in in_node[1]:
sad_mut = out_dir + in_node[1]["sad_mut_alt"]
sad_ref = out_dir + in_node[1]["sad_mut_ref"]
sad_all = out_dir + in_node[1]["sad_pdf"]
chrom = in_node[1]["gen_chrom"]
snp_loc = in_node[1]["gen_start"]
for neigh in in_annot_graph[in_node[0]]:
open_region = neigh
data = [
go.Heatmap(
z=unserial_sad_table.values,
y=unserial_sad_table.index,
x=unserial_sad_table.columns.values,
colorscale="spectral",
)
]
layout = go.Layout(
title='Basset SAD Prediction',
xaxis = dict(ticks=''),
yaxis = dict(ticks='' )
)
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig)
return (sad_ref, sad_mut)
cell_types = [os.path.basename(c_type) for c_type in glob.glob("example_output/output_IN_graphs/*")]
#picking cell type and run
in_args = dict()
cell_picker = widgets.Dropdown(
options=cell_types,
value = cell_types[0],
description='Cell type:')
name_types = [os.path.basename(c_type) for c_type in glob.glob("example_output/output_IN_graphs/" + cell_types[0] + "/*")]
name_types.append(None)
name_picker = widgets.Dropdown(
options=name_types,
value = name_types[0],
description='Run:')
in_picker = widgets.Dropdown(
options=["No Run Picked"],
description='Pick IN:')
sad_table_wid = widgets.Dropdown(
options=["NA"],
description="SAD Table")
cell_picker.observe(update_run_name, 'value')
name_picker.observe(update_global_graph, 'value')
#in_picker.observe(sad_table_wid, "value")
#inlink = widgets.jslink((in_picker, 'value'), (sad_table_wid, 'value'))
display(cell_picker)
display(name_picker)
In [3]:
%%javascript
var config = {
"editable": true,
"zoomFixed": false,
"trackSourceServers": [
"http://higlass.io/api/v1"
],
"exportViewUrl": "http://higlass.io/api/v1/viewconfs/",
"views": [
{
"uid": "aa",
"initialXDomain": [
509161199.6251172,
2590838800.374883
],
"autocompleteSource": "http://higlass.io/api/v1/suggest/?d=OHJakQICQD6gTD7skx4EWA&",
"genomePositionSearchBoxVisible": true,
"chromInfoPath": "//s3.amazonaws.com/pkerp/data/hg19/chromSizes.tsv",
"tracks": {
"top": [
{
"type": "horizontal-gene-annotations",
"height": 60,
"tilesetUid": "OHJakQICQD6gTD7skx4EWA",
"server": "http://higlass.io/api/v1",
"position": "top",
"uid": "OHJakQICQD6gTD7skx4EWA",
"name": "Gene Annotations (hg19)",
"options": {
"name": "Gene Annotations (hg19)"
},
"maxWidth": 4294967296,
"maxZoom": 22
},
{
"chromInfoPath": "//s3.amazonaws.com/pkerp/data/hg19/chromSizes.tsv",
"type": "horizontal-chromosome-labels",
"position": "top",
"name": "Chromosome Labels (hg19)",
"height": 30,
"uid": "N_w4XVIpTPSpq2xBVTToLA",
"options": {}
}
],
"left": [
{
"type": "vertical-gene-annotations",
"width": 60,
"tilesetUid": "OHJakQICQD6gTD7skx4EWA",
"server": "http://higlass.io/api/v1",
"position": "left",
"name": "Gene Annotations (hg19)",
"options": {
"labelPosition": "bottomRight",
"name": "Gene Annotations (hg19)"
},
"uid": "BtHSrxzrSUy21KQ7D6lmZA",
"maxWidth": 4294967296,
"maxZoom": 22
},
{
"chromInfoPath": "//s3.amazonaws.com/pkerp/data/hg19/chromSizes.tsv",
"type": "vertical-chromosome-labels",
"position": "left",
"name": "Chromosome Labels (hg19)",
"width": 30,
"uid": "V74xYbctSbKB2XSesy2Glg",
"options": {}
}
],
"center": [
{
"uid": "c1",
"type": "combined",
"height": 200,
"contents": [
{
"server": "http://higlass.io/api/v1",
"tilesetUid": "CQMd6V_cRw6iCI_-Unl3PQ",
"type": "heatmap",
"position": "center",
"options": {
"colorRange": [
"#FFFFFF",
"#F8E71C",
"#F5A623",
"#D0021B"
],
"maxZoom": null,
"labelPosition": "bottomRight",
"name": "Rao et al. (2014) GM12878 MboI (allreps) 1kb"
},
"uid": "QGf8KgUsSPCoVlk-EPWn3w",
"name": "Rao et al. (2014) GM12878 MboI (allreps) 1kb",
"maxWidth": 4194304000,
"binsPerDimension": 256,
"maxZoom": 14
}
],
"position": "center",
"options": {}
}
],
"right": [],
"bottom": []
},
"initialYDomain": [
619564198.6879103,
2480435801.312089
],
"layout": {
"w": 6,
"h": 12,
"x": 0,
"y": 0,
"i": "aa",
"moved": false,
"static": false
}
}
],
"zoomLocks": {
"locksByViewUid": {},
"locksDict": {}
},
"locationLocks": {
"locksByViewUid": {},
"locksDict": {}
}
};
element.append('<div><div id="higlass" class="full-dim"></div></div>');
requirejs.config({
paths: {
"hglib": "//higlass.io/assets/scripts-third-party/hglib"
},
waitSeconds: 2
});
require(['hglib'], function(hglib) {
hglib.createHgComponent(
document.querySelector("#higlass"),
config,
{ bounded: true},
function (api) { window.higlassApi = api; }
);
});
In [8]:
interact(in_json_out_plotly_fig, json_name=in_picker,make_3D=False, global_in=fixed(False));
In [20]:
sad_tab_out = interactive(draw_sad_table, in_json_name=sad_table_wid)
display(sad_tab_out)
In [21]:
#Basset in-silico mutagenesis
from IPython.display import IFrame
IFrame(sad_tab_out.result[0],width=600, height=300)
Out[21]:
In [22]:
IFrame(sad_tab_out.result[1],width=600, height=300)
Out[22]: