Making a bokeh dendrogram
In [1]:
%matplotlib inline
import numpy as np
from scipy.cluster.hierarchy import linkage, dendrogram
import matplotlib.pyplot as plt
import csv, sys
In [3]:
file_name = "data/ALL_noSORT.dex1hr_observed_nuTSS_all.wgEncodeBroadHistoneA549ControlDex100nmSig.2000-center-2000-20.matrix"
In [4]:
data_array = np.loadtxt(file_name,skiprows=1,usecols=range(7,206))
subset = data_array[0:100]
In [5]:
lnk = linkage(subset)
In [6]:
colors = ["k"]*2*(len(subset))
dg = dendrogram(lnk, link_color_func = lambda k: colors[k], orientation="right", labels=None)
plt.show()
/Users/shapiroaj4/.virtualenvs/genomics/lib/python2.7/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
if self._edgecolors == str('face'):
In [7]:
x_val_0 = []
x_val_1 = []
y_val_0 = []
y_val_1 = []
max_x = np.amax(dg['icoord'])
max_y = np.amax(dg['dcoord'])
for i in range(len(dg['dcoord'])):
#for i in [0]:
for j in [0,1,2]:
x_val_0.append(max_x - dg['icoord'][i][j])
y_val_0.append(max_y - dg['dcoord'][i][j])
for j in [1,2,3]:
x_val_1.append(max_x - dg['icoord'][i][j])
y_val_1.append(max_y - dg['dcoord'][i][j])
In [8]:
import json
print json.dumps(dg, indent=2)
{
"ivl": [
"87",
"82",
"86",
"79",
"31",
"34",
"94",
"83",
"80",
"93",
"47",
"89",
"85",
"35",
"10",
"84",
"38",
"90",
"25",
"66",
"70",
"16",
"88",
"6",
"61",
"51",
"29",
"50",
"78",
"0",
"24",
"41",
"54",
"95",
"59",
"14",
"33",
"71",
"69",
"49",
"46",
"37",
"64",
"44",
"8",
"73",
"58",
"30",
"39",
"32",
"98",
"72",
"42",
"77",
"36",
"67",
"92",
"1",
"63",
"12",
"60",
"4",
"75",
"13",
"27",
"22",
"57",
"43",
"76",
"23",
"81",
"28",
"97",
"45",
"74",
"56",
"7",
"40",
"26",
"62",
"65",
"2",
"15",
"48",
"21",
"5",
"17",
"91",
"9",
"55",
"19",
"11",
"3",
"96",
"52",
"99",
"20",
"68",
"18",
"53"
],
"dcoord": [
[
0.0,
27.518144480995115,
27.518144480995115,
0.0
],
[
0.0,
27.130722934022966,
27.130722934022966,
0.0
],
[
0.0,
25.29390717480382,
25.29390717480382,
0.0
],
[
0.0,
27.413400554695176,
27.413400554695176,
25.29390717480382
],
[
0.0,
25.293013118368002,
25.293013118368002,
0.0
],
[
0.0,
23.792179710369965,
23.792179710369965,
0.0
],
[
0.0,
21.642148027329426,
21.642148027329426,
0.0
],
[
0.0,
23.557558116756603,
23.557558116756603,
21.642148027329426
],
[
0.0,
23.576105300899091,
23.576105300899091,
23.557558116756603
],
[
0.0,
24.193115972058131,
24.193115972058131,
0.0
],
[
0.0,
24.293275116073026,
24.293275116073026,
0.0
],
[
0.0,
24.116070929447591,
24.116070929447591,
0.0
],
[
0.0,
24.405210501109874,
24.405210501109874,
24.116070929447591
],
[
24.293275116073026,
24.715630014374756,
24.715630014374756,
24.405210501109874
],
[
24.193115972058131,
24.953184532786896,
24.953184532786896,
24.715630014374756
],
[
0.0,
23.608271686271824,
23.608271686271824,
0.0
],
[
0.0,
23.728867544577454,
23.728867544577454,
23.608271686271824
],
[
0.0,
19.45993128599261,
19.45993128599261,
0.0
],
[
0.0,
23.757387174554655,
23.757387174554655,
19.45993128599261
],
[
0.0,
23.992555061068995,
23.992555061068995,
23.757387174554655
],
[
0.0,
20.982586650828207,
20.982586650828207,
0.0
],
[
0.0,
21.446399428227483,
21.446399428227483,
0.0
],
[
0.0,
21.717612015031946,
21.717612015031946,
21.446399428227483
],
[
0.0,
22.861730799321712,
22.861730799321712,
21.717612015031946
],
[
20.982586650828207,
22.985605750972798,
22.985605750972798,
22.861730799321712
],
[
0.0,
24.002218208777489,
24.002218208777489,
22.985605750972798
],
[
23.992555061068995,
24.398753691690793,
24.398753691690793,
24.002218208777489
],
[
0.0,
20.686156643048491,
20.686156643048491,
0.0
],
[
0.0,
23.685004147009344,
23.685004147009344,
20.686156643048491
],
[
0.0,
23.793732585817956,
23.793732585817956,
23.685004147009344
],
[
0.0,
24.128861717277111,
24.128861717277111,
23.793732585817956
],
[
0.0,
24.498526466235624,
24.498526466235624,
24.128861717277111
],
[
24.398753691690793,
24.514347682406012,
24.514347682406012,
24.498526466235624
],
[
0.0,
24.630362805196452,
24.630362805196452,
24.514347682406012
],
[
0.0,
25.155905026757313,
25.155905026757313,
24.630362805196452
],
[
23.728867544577454,
25.15647802399387,
25.15647802399387,
25.155905026757313
],
[
24.953184532786896,
25.172552483013433,
25.172552483013433,
25.15647802399387
],
[
0.0,
25.20417405616141,
25.20417405616141,
25.172552483013433
],
[
0.0,
25.220582192806511,
25.220582192806511,
25.20417405616141
],
[
0.0,
25.389559824585536,
25.389559824585536,
25.220582192806511
],
[
0.0,
25.64110842421951,
25.64110842421951,
25.389559824585536
],
[
23.576105300899091,
25.690106685653056,
25.690106685653056,
25.64110842421951
],
[
0.0,
25.741180813363346,
25.741180813363346,
25.690106685653056
],
[
0.0,
25.785115566546956,
25.785115566546956,
25.741180813363346
],
[
23.792179710369965,
25.836092336471886,
25.836092336471886,
25.785115566546956
],
[
0.0,
25.977853175545881,
25.977853175545881,
25.836092336471886
],
[
0.0,
26.039172751469021,
26.039172751469021,
25.977853175545881
],
[
0.0,
26.214361680070962,
26.214361680070962,
26.039172751469021
],
[
0.0,
26.220286759894904,
26.220286759894904,
26.214361680070962
],
[
0.0,
26.313299377256058,
26.313299377256058,
26.220286759894904
],
[
0.0,
26.580925879205548,
26.580925879205548,
26.313299377256058
],
[
0.0,
26.722057155713269,
26.722057155713269,
26.580925879205548
],
[
0.0,
26.773048994507231,
26.773048994507231,
26.722057155713269
],
[
25.293013118368002,
26.843624326874014,
26.843624326874014,
26.773048994507231
],
[
0.0,
27.034800150593306,
27.034800150593306,
26.843624326874014
],
[
0.0,
27.038132136258341,
27.038132136258341,
27.034800150593306
],
[
0.0,
27.043594369695519,
27.043594369695519,
27.038132136258341
],
[
0.0,
27.072572774219154,
27.072572774219154,
27.043594369695519
],
[
0.0,
27.081812907879932,
27.081812907879932,
27.072572774219154
],
[
0.0,
27.189432870985865,
27.189432870985865,
27.081812907879932
],
[
0.0,
27.256279237628505,
27.256279237628505,
27.189432870985865
],
[
0.0,
27.429933367865605,
27.429933367865605,
27.256279237628505
],
[
0.0,
27.554928855086274,
27.554928855086274,
27.429933367865605
],
[
0.0,
27.677430716668201,
27.677430716668201,
27.554928855086274
],
[
0.0,
27.92402983569087,
27.92402983569087,
27.677430716668201
],
[
0.0,
27.953824070897817,
27.953824070897817,
27.92402983569087
],
[
0.0,
28.065471593428093,
28.065471593428093,
27.953824070897817
],
[
0.0,
28.192957887377265,
28.192957887377265,
28.065471593428093
],
[
0.0,
28.439469902846735,
28.439469902846735,
28.192957887377265
],
[
0.0,
28.446578588989386,
28.446578588989386,
28.439469902846735
],
[
27.413400554695176,
28.543119545427814,
28.543119545427814,
28.446578588989386
],
[
0.0,
28.760168050474121,
28.760168050474121,
28.543119545427814
],
[
27.130722934022966,
28.846531500628949,
28.846531500628949,
28.760168050474121
],
[
0.0,
28.914750798203293,
28.914750798203293,
28.846531500628949
],
[
0.0,
29.053566373210469,
29.053566373210469,
28.914750798203293
],
[
0.0,
29.342937764994055,
29.342937764994055,
29.053566373210469
],
[
0.0,
29.460637698118102,
29.460637698118102,
29.342937764994055
],
[
0.0,
29.861509550922673,
29.861509550922673,
29.460637698118102
],
[
0.0,
30.035620166466973,
30.035620166466973,
29.861509550922673
],
[
0.0,
30.035700370427925,
30.035700370427925,
30.035620166466973
],
[
27.518144480995115,
30.036451385299898,
30.036451385299898,
30.035700370427925
],
[
0.0,
30.993746370583757,
30.993746370583757,
30.036451385299898
],
[
0.0,
31.390693929461875,
31.390693929461875,
30.993746370583757
],
[
0.0,
31.975461478475623,
31.975461478475623,
31.390693929461875
],
[
0.0,
32.491475164122512,
32.491475164122512,
31.975461478475623
],
[
0.0,
33.676027856995724,
33.676027856995724,
32.491475164122512
],
[
0.0,
34.050456666218182,
34.050456666218182,
33.676027856995724
],
[
0.0,
35.79875728433219,
35.79875728433219,
34.050456666218182
],
[
0.0,
36.00321388342384,
36.00321388342384,
35.79875728433219
],
[
0.0,
36.711188750976206,
36.711188750976206,
36.00321388342384
],
[
0.0,
36.751516582309478,
36.751516582309478,
36.711188750976206
],
[
0.0,
37.517628009874215,
37.517628009874215,
36.751516582309478
],
[
0.0,
37.925627428739297,
37.925627428739297,
37.517628009874215
],
[
0.0,
38.882682924998058,
38.882682924998058,
37.925627428739297
],
[
0.0,
38.923469813407898,
38.923469813407898,
38.882682924998058
],
[
0.0,
39.690473782136678,
39.690473782136678,
38.923469813407898
],
[
0.0,
40.330392131905356,
40.330392131905356,
39.690473782136678
],
[
0.0,
42.82006360709731,
42.82006360709731,
40.330392131905356
],
[
0.0,
46.270864821597307,
46.270864821597307,
42.82006360709731
]
],
"leaves": [
87,
82,
86,
79,
31,
34,
94,
83,
80,
93,
47,
89,
85,
35,
10,
84,
38,
90,
25,
66,
70,
16,
88,
6,
61,
51,
29,
50,
78,
0,
24,
41,
54,
95,
59,
14,
33,
71,
69,
49,
46,
37,
64,
44,
8,
73,
58,
30,
39,
32,
98,
72,
42,
77,
36,
67,
92,
1,
63,
12,
60,
4,
75,
13,
27,
22,
57,
43,
76,
23,
81,
28,
97,
45,
74,
56,
7,
40,
26,
62,
65,
2,
15,
48,
21,
5,
17,
91,
9,
55,
19,
11,
3,
96,
52,
99,
20,
68,
18,
53
],
"color_list": [
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k",
"k"
],
"icoord": [
[
185.0,
185.0,
195.0,
195.0
],
[
275.0,
275.0,
285.0,
285.0
],
[
315.0,
315.0,
325.0,
325.0
],
[
305.0,
305.0,
320.0,
320.0
],
[
495.0,
495.0,
505.0,
505.0
],
[
595.0,
595.0,
605.0,
605.0
],
[
655.0,
655.0,
665.0,
665.0
],
[
645.0,
645.0,
660.0,
660.0
],
[
635.0,
635.0,
652.5,
652.5
],
[
715.0,
715.0,
725.0,
725.0
],
[
735.0,
735.0,
745.0,
745.0
],
[
765.0,
765.0,
775.0,
775.0
],
[
755.0,
755.0,
770.0,
770.0
],
[
740.0,
740.0,
762.5,
762.5
],
[
720.0,
720.0,
751.25,
751.25
],
[
795.0,
795.0,
805.0,
805.0
],
[
785.0,
785.0,
800.0,
800.0
],
[
855.0,
855.0,
865.0,
865.0
],
[
845.0,
845.0,
860.0,
860.0
],
[
835.0,
835.0,
852.5,
852.5
],
[
885.0,
885.0,
895.0,
895.0
],
[
925.0,
925.0,
935.0,
935.0
],
[
915.0,
915.0,
930.0,
930.0
],
[
905.0,
905.0,
922.5,
922.5
],
[
890.0,
890.0,
913.75,
913.75
],
[
875.0,
875.0,
901.875,
901.875
],
[
843.75,
843.75,
888.4375,
888.4375
],
[
985.0,
985.0,
995.0,
995.0
],
[
975.0,
975.0,
990.0,
990.0
],
[
965.0,
965.0,
982.5,
982.5
],
[
955.0,
955.0,
973.75,
973.75
],
[
945.0,
945.0,
964.375,
964.375
],
[
866.09375,
866.09375,
954.6875,
954.6875
],
[
825.0,
825.0,
910.390625,
910.390625
],
[
815.0,
815.0,
867.6953125,
867.6953125
],
[
792.5,
792.5,
841.34765625,
841.34765625
],
[
735.625,
735.625,
816.923828125,
816.923828125
],
[
705.0,
705.0,
776.2744140625,
776.2744140625
],
[
695.0,
695.0,
740.63720703125,
740.63720703125
],
[
685.0,
685.0,
717.818603515625,
717.818603515625
],
[
675.0,
675.0,
701.4093017578125,
701.4093017578125
],
[
643.75,
643.75,
688.2046508789062,
688.2046508789062
],
[
625.0,
625.0,
665.9773254394531,
665.9773254394531
],
[
615.0,
615.0,
645.4886627197266,
645.4886627197266
],
[
600.0,
600.0,
630.2443313598633,
630.2443313598633
],
[
585.0,
585.0,
615.1221656799316,
615.1221656799316
],
[
575.0,
575.0,
600.0610828399658,
600.0610828399658
],
[
565.0,
565.0,
587.5305414199829,
587.5305414199829
],
[
555.0,
555.0,
576.2652707099915,
576.2652707099915
],
[
545.0,
545.0,
565.6326353549957,
565.6326353549957
],
[
535.0,
535.0,
555.3163176774979,
555.3163176774979
],
[
525.0,
525.0,
545.1581588387489,
545.1581588387489
],
[
515.0,
515.0,
535.0790794193745,
535.0790794193745
],
[
500.0,
500.0,
525.0395397096872,
525.0395397096872
],
[
485.0,
485.0,
512.5197698548436,
512.5197698548436
],
[
475.0,
475.0,
498.7598849274218,
498.7598849274218
],
[
465.0,
465.0,
486.8799424637109,
486.8799424637109
],
[
455.0,
455.0,
475.93997123185545,
475.93997123185545
],
[
445.0,
445.0,
465.4699856159277,
465.4699856159277
],
[
435.0,
435.0,
455.23499280796386,
455.23499280796386
],
[
425.0,
425.0,
445.11749640398193,
445.11749640398193
],
[
415.0,
415.0,
435.05874820199097,
435.05874820199097
],
[
405.0,
405.0,
425.0293741009955,
425.0293741009955
],
[
395.0,
395.0,
415.01468705049774,
415.01468705049774
],
[
385.0,
385.0,
405.00734352524887,
405.00734352524887
],
[
375.0,
375.0,
395.00367176262444,
395.00367176262444
],
[
365.0,
365.0,
385.0018358813122,
385.0018358813122
],
[
355.0,
355.0,
375.0009179406561,
375.0009179406561
],
[
345.0,
345.0,
365.00045897032805,
365.00045897032805
],
[
335.0,
335.0,
355.000229485164,
355.000229485164
],
[
312.5,
312.5,
345.000114742582,
345.000114742582
],
[
295.0,
295.0,
328.750057371291,
328.750057371291
],
[
280.0,
280.0,
311.8750286856455,
311.8750286856455
],
[
265.0,
265.0,
295.93751434282274,
295.93751434282274
],
[
255.0,
255.0,
280.46875717141137,
280.46875717141137
],
[
245.0,
245.0,
267.7343785857057,
267.7343785857057
],
[
235.0,
235.0,
256.36718929285286,
256.36718929285286
],
[
225.0,
225.0,
245.68359464642643,
245.68359464642643
],
[
215.0,
215.0,
235.3417973232132,
235.3417973232132
],
[
205.0,
205.0,
225.1708986616066,
225.1708986616066
],
[
190.0,
190.0,
215.0854493308033,
215.0854493308033
],
[
175.0,
175.0,
202.54272466540164,
202.54272466540164
],
[
165.0,
165.0,
188.77136233270082,
188.77136233270082
],
[
155.0,
155.0,
176.88568116635042,
176.88568116635042
],
[
145.0,
145.0,
165.9428405831752,
165.9428405831752
],
[
135.0,
135.0,
155.4714202915876,
155.4714202915876
],
[
125.0,
125.0,
145.2357101457938,
145.2357101457938
],
[
115.0,
115.0,
135.1178550728969,
135.1178550728969
],
[
105.0,
105.0,
125.05892753644845,
125.05892753644845
],
[
95.0,
95.0,
115.02946376822422,
115.02946376822422
],
[
85.0,
85.0,
105.01473188411211,
105.01473188411211
],
[
75.0,
75.0,
95.00736594205605,
95.00736594205605
],
[
65.0,
65.0,
85.00368297102803,
85.00368297102803
],
[
55.0,
55.0,
75.001841485514,
75.001841485514
],
[
45.0,
45.0,
65.000920742757,
65.000920742757
],
[
35.0,
35.0,
55.0004603713785,
55.0004603713785
],
[
25.0,
25.0,
45.00023018568925,
45.00023018568925
],
[
15.0,
15.0,
35.000115092844624,
35.000115092844624
],
[
5.0,
5.0,
25.000057546422312,
25.000057546422312
]
]
}
In [9]:
from bokeh.plotting import figure, show
plot = figure(width=600, height=600)
plot.segment(x0=y_val_0, y0=x_val_0, x1=y_val_1,
y1=x_val_1, color="#000000",
line_width=1)
show(plot)
Working with cross-correlative matrices; further refinement
In [10]:
cross_corr_matrix = "data/ENCDRTR_dex1hr_observed_nuTSS.GR.5bin_matrix_spearman.csv"
row_names = []
col_names = []
f = open(cross_corr_matrix)
header = next(f).strip().split(",")
for i in range(1, len(header)):
col_names.append(header[i])
for line in f:
col_num = len(line.split(","))
row_names.append(line.split(",")[0])
f.close()
corr_array = np.loadtxt(cross_corr_matrix, skiprows=1, usecols=range(1,col_num), delimiter=",")
In [11]:
corr_lnk = linkage(corr_array)
corr_dg = dendrogram(corr_lnk)
In [12]:
import pandas as pd
corr_pd = pd.read_csv(cross_corr_matrix, index_col=0)
In [13]:
corr_pd_lnk = linkage(corr_pd.as_matrix())
corr_pd_dg = dendrogram(corr_pd_lnk)
In [14]:
x_val_0 = []
x_val_1 = []
y_val_0 = []
y_val_1 = []
max_x = np.amax(corr_pd_dg['icoord'])
max_y = np.amax(corr_pd_dg['dcoord'])
for i in range(len(dg['dcoord'])):
#for i in [0]:
for j in [0,1,2]:
x_val_0.append(max_x - corr_pd_dg['icoord'][i][j])
y_val_0.append(max_y - corr_pd_dg['dcoord'][i][j])
for j in [1,2,3]:
x_val_1.append(max_x - corr_pd_dg['icoord'][i][j])
y_val_1.append(max_y - corr_pd_dg['dcoord'][i][j])
In [15]:
plot = figure(width=600, height=600)
plot.segment(x0=y_val_0, y0=x_val_0, x1=y_val_1,
y1=x_val_1, color="#000000",
line_width=1)
show(plot)
Content source: lavenderca/encoderator
Similar notebooks: