notebook.community

Edit and run



In [1]:

    
cd executible/









    



/Users/alexeygilman/repos/Cu_transition_time_course-/executible



In [2]:

    
%run Cu_transition_functionalized.py



In [3]:

    
import hdbscan
import time
from sklearn import metrics



In [4]:

    
df1_raw_FM40 = raw_data_cleanup("5G_counts.tsv")


columns = ['5GB1_FM40_T0m_TR2', '5GB1_FM40_T10m_TR3', '5GB1_FM40_T20m_TR2', '5GB1_FM40_T40m_TR1',
           '5GB1_FM40_T60m_TR1', '5GB1_FM40_T90m_TR2', '5GB1_FM40_T150m_TR1_remake', '5GB1_FM40_T180m_TR1']

df2_TPM = TPM_counts(df1_raw_FM40, "start_coord", "end_coord",columns, remove_zero = True)  #TPM counts
df2_TPM_log2 = log_2_transform(df2_TPM, "5GB1_FM40_T0m_TR2","5GB1_FM40_T180m_TR1") #TPM log 2 transformed 
df2_TPM_mean = mean_center(df2_TPM, "5GB1_FM40_T0m_TR2","5GB1_FM40_T180m_TR1") #TPM mean centered 

df3_pearson_r = congruency_table(df2_TPM, "5GB1_FM40_T0m_TR2" , "5GB1_FM40_T180m_TR1", step = df2_TPM.shape[0])
df3_euclidean_mean = euclidean_distance(df2_TPM_mean, "5GB1_FM40_T0m_TR2" , "5GB1_FM40_T180m_TR1")
df3_euclidean_log2 = euclidean_distance(df2_TPM_mean, "5GB1_FM40_T0m_TR2" , "5GB1_FM40_T180m_TR1" )

print("The shape of the TPM table is ", df2_TPM.shape)
print("The shape of the pearson_r matrix is ", df3_pearson_r.shape)









    



5G_counts.tsv was located in the directory
5G_counts.tsv was imported into dataframe
QC columns were removed from dataframe
All non FM40 data were removed from dataframe
All FM40 columns were sorted by timecourse sequence
Clean-up of raw data complete
The shape of the TPM table is  (4480, 16)
The shape of the pearson_r matrix is  (4480, 4480)

Clustering pearsons_r with HDBSCAN



In [5]:

    
# Clustering the pearsons_R with N/A vlaues removed 

hdb_t1 = time.time()
hdb_pearson_r = hdbscan.HDBSCAN(metric = "precomputed", min_cluster_size=10).fit(df3_pearson_r)
hdb_pearson_r_labels = hdb_pearson_r.labels_
hdb_elapsed_time = time.time() - hdb_t1
print("time to cluster", hdb_elapsed_time)









    



time to cluster 4.785267114639282



In [6]:

    
print(np.unique(hdb_pearson_r_labels)) # unique bins, zero is noise
print(np.bincount(hdb_pearson_r_labels[hdb_pearson_r_labels!=-1]))



In [24]:

    
pearson_clusters = {i: np.where(hdb_pearson_r_labels == i)[0] for i in range(2)}



In [23]:

    
pearson_clusters









    Out[23]:





{0: array([  75,  109,  124,  125,  208,  217,  236,  237,  250,  278,  281,
         282,  318,  319,  320,  341,  342,  344,  404,  405,  406,  407,
         408,  499,  500,  501,  502,  503,  517,  525,  526,  530,  608,
         657,  660,  661,  663,  665,  666,  669,  673,  683,  686,  687,
         688,  690,  691,  692,  693,  694,  696,  699,  700,  738,  847,
         938,  983,  990,  992,  993,  995, 1028, 1107, 1123, 1151, 1152,
        1153, 1318, 1319, 1328, 1330, 1331, 1342, 1343, 1403, 1425, 1469,
        1503, 1512, 1544, 1550, 1577, 1632, 1633, 1641, 1643, 1804, 1805,
        1820, 1845, 1855, 1866, 1878, 1954, 1960, 1961, 1962, 1989, 2014,
        2110, 2156, 2162, 2165, 2175, 2189, 2218, 2234, 2318, 2332, 2334,
        2336, 2337, 2340, 2380, 2382, 2399, 2458, 2466, 2477, 2478, 2483,
        2506, 2518, 2540, 2586, 2587, 2603, 2605, 2606, 2654, 2725, 2727,
        2743, 2805, 2817, 2818, 2819, 2825, 3062, 3063, 3143, 3297, 3301,
        3363, 3391, 3414, 3437, 3483, 3484, 3596, 3597, 3599, 3602, 3603,
        3604, 3605, 3617, 3630, 3647, 3648, 3670, 3717, 3718, 3719, 3720,
        3723, 3724, 3725, 3726, 3727, 3734, 3735, 3736, 3738, 3740, 3741,
        3743, 3744, 3757, 3773, 3774, 3775, 3834, 3854, 3855, 3857, 3863,
        3865, 3866, 3877, 3884, 3915, 3951, 3965, 4046, 4082, 4083, 4087,
        4092, 4094, 4095, 4117, 4119, 4121, 4149, 4152, 4159, 4242, 4254,
        4261, 4283, 4304, 4306, 4353, 4354, 4355, 4382, 4390, 4398, 4427,
        4429, 4435, 4450, 4457]),
 1: array([1507, 2112, 2113, 2115, 2765, 3102, 3103, 3332, 3407, 4036, 4038,
        4098, 4391, 4394, 4397])}



In [25]:

    
#pd.set_option('display.height', 500)  #These two commands allow for the display of max of 500 rows - exploring genes
#pd.set_option('display.max_rows', 500) 
df2_TPM.iloc[pearson_clusters[1],:] #the genes that were clustered together [0,1]









    Out[25]:






  
    
      
      product
      type
      gene_symbol
      locus
      start_coord
      end_coord
      note
      translation
      5GB1_FM40_T0m_TR2
      5GB1_FM40_T10m_TR3
      5GB1_FM40_T20m_TR2
      5GB1_FM40_T40m_TR1
      5GB1_FM40_T60m_TR1
      5GB1_FM40_T90m_TR2
      5GB1_FM40_T150m_TR1_remake
      5GB1_FM40_T180m_TR1
    
    
      locus_tag
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      MBURv2_160283
      putative ferric siderophore transport system, ...
      CDS
      NaN
      MBURv2
      3576162
      3576686
      Evidence 3 : Function proposed based on presen...
      NaN
      118.243568
      112.275736
      110.964306
      100.836391
      77.197657
      70.032566
      73.893920
      95.436505
    
    
      MBURv2_20180
      ferrous iron transporter, protein A
      CDS
      feoA
      MBURv2
      281921
      282154
      Evidence 2a : Function of homologous gene expe...
      NaN
      210.424105
      158.632845
      181.080545
      126.342956
      61.051857
      70.194605
      95.464947
      118.980515
    
    
      MBURv2_20181
      fused ferrous iron transporter, protein B: GTP...
      CDS
      feoB
      MBURv2
      282154
      284478
      Evidence 2a : Function of homologous gene expe...
      NaN
      95.819838
      86.744847
      91.146440
      71.621996
      32.461622
      39.674872
      55.479556
      64.157192
    
    
      MBURv2_20183
      conserved protein of unknown function
      CDS
      NaN
      MBURv2
      284953
      286617
      Evidence 4 : Homologs of previously reported g...
      NaN
      92.724781
      77.558180
      86.194789
      70.352941
      49.814896
      47.463966
      55.260844
      59.371024
    
    
      MBURv2_210346
      Histidine kinase
      CDS
      NaN
      MBURv2
      4474339
      4475010
      NaN
      NaN
      70.518396
      68.822867
      66.867096
      63.493064
      49.910520
      54.874815
      53.845876
      59.197819
    
    
      MBURv2_240071
      conserved protein of unknown function
      CDS
      NaN
      MBURv2
      4822700
      4823671
      Evidence 4 : Homologs of previously reported g...
      NaN
      115.521215
      125.266171
      122.873477
      171.859978
      242.070964
      230.034969
      215.806091
      173.416287
    
    
      MBURv2_240072
      conserved protein of unknown function
      CDS
      NaN
      MBURv2
      4823671
      4825122
      Evidence 4 : Homologs of previously reported g...
      NaN
      152.884221
      166.004637
      159.391668
      244.378006
      339.549453
      333.040188
      298.420330
      260.795367
    
    
      MBURv2_30040
      conserved exported protein of unknown function
      CDS
      NaN
      MBURv2
      648480
      649751
      Evidence 4 : Homologs of previously reported g...
      NaN
      346.196665
      456.748233
      416.782502
      555.641953
      1007.956458
      965.279974
      855.040811
      766.323303
    
    
      MBURv2_30115
      Integrase/recombinase
      CDS
      int
      MBURv2
      749393
      750415
      NaN
      NaN
      18.954208
      19.738377
      18.230891
      15.962341
      11.687684
      12.175099
      14.009989
      14.678014
    
    
      MBURv2_60111
      nitrite reductase, large subunit, NAD(P)H-binding
      CDS
      nirB
      MBURv2
      1491766
      1494321
      Evidence 2a : Function of homologous gene expe...
      NaN
      605.258423
      691.437830
      667.622137
      817.519655
      1077.573416
      1024.202207
      988.430960
      914.548557
    
    
      MBURv2_60113
      Response regulator receiver and ANTAR domain p...
      CDS
      NaN
      MBURv2
      1496503
      1497081
      NaN
      NaN
      23.493069
      27.461091
      27.963444
      34.230125
      49.110936
      51.477168
      44.846438
      43.132774
    
    
      MBURv2_60177
      Transcriptional antiterminator, Rof
      CDS
      NaN
      MBURv2
      1561635
      1561889
      NaN
      NaN
      245.862088
      217.642197
      254.978512
      131.689317
      45.544737
      44.577813
      81.965303
      131.877279
    
    
      MBURv2_80064
      fused DNA-binding response regulator in two-co...
      CDS
      glnG
      MBURv2
      1901015
      1902424
      Evidence 2a : Function of homologous gene expe...
      NaN
      108.514855
      116.924797
      121.151452
      163.911171
      229.124357
      225.811603
      183.639002
      180.891342
    
    
      MBURv2_80067
      nitrogen assimilation regulatory protein for G...
      CDS
      glnK
      MBURv2
      1906036
      1906374
      Evidence 2a : Function of homologous gene expe...
      NaN
      1236.258838
      1480.016871
      1362.990010
      2351.761430
      3215.524758
      3305.708022
      3031.353754
      2351.879330
    
    
      MBURv2_80070
      ammonium transporter
      CDS
      amtB
      MBURv2
      1908452
      1909744
      Evidence 2a : Function of homologous gene expe...
      NaN
      178.796228
      216.015070
      211.287476
      293.525608
      434.560734
      446.470951
      376.033774
      368.793380

Looks like there are two clusters, some expression and zero expression across samples.

Clustering mean centered euclidean distance with with HDBSCAN



In [27]:

    
df3_euclidean_mean.hist()









    Out[27]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x113686b00>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x115b5dda0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x115baf9e8>, ...,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1181f8780>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x118324cc0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11836df28>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1183afd30>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1183fd978>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x118532be0>, ...,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119eecb70>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11a036f98>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11a078748>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x11a0c6198>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11a1008d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11a150518>, ...,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11bd07630>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11bd55278>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11bd912e8>],
       ..., 
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1aad462e8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1aad88240>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1aadc8a90>, ...,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1ad083cf8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1ad0c6588>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1ad109a20>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1ad159978>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1ad1a3048>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1ad458b38>, ...,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1aedabcc0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1aede3198>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1aef380f0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1aef6e940>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1af12c2b0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1aefcf0b8>, ...,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1b0c3b240>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1b0c66780>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1b0c58da0>]], dtype=object)



In [10]:

    
# Clustering the mean centered euclidean distance of TPM counts 

hdb_t1 = time.time()
hdb_euclidean_mean = hdbscan.HDBSCAN(metric = "precomputed", min_cluster_size=10).fit(df3_euclidean_mean)
hdb_euclidean_mean_labels = hdb_euclidean_mean.labels_
hdb_elapsed_time = time.time() - hdb_t1
print("time to cluster", hdb_elapsed_time)









    



time to cluster 6.022979021072388



In [11]:

    
print(np.unique(hdb_euclidean_mean_labels))
print(np.bincount(hdb_euclidean_mean_labels[hdb_euclidean_mean_labels!=-1]))









    



[-1]
[]



In [12]:

    
euclidean_mean_clusters = {i: np.where(hdb_euclidean_mean_labels == i)[0] for i in range(2)}
df2_TPM.iloc[euclidean_mean_clusters[1],:]









    Out[12]:






  
    
      
      product
      type
      gene_symbol
      locus
      start_coord
      end_coord
      note
      translation
      5GB1_FM40_T0m_TR2
      5GB1_FM40_T10m_TR3
      5GB1_FM40_T20m_TR2
      5GB1_FM40_T40m_TR1
      5GB1_FM40_T60m_TR1
      5GB1_FM40_T90m_TR2
      5GB1_FM40_T150m_TR1_remake
      5GB1_FM40_T180m_TR1
    
    
      locus_tag

Looks like 2 clusters - both with zero expression.

looks like wether it is a numpy array or pandas dataframe, the result is the same. lets now try to get index of the clustered points.

Clustering log transformed euclidean distance with with HDBSCAN



In [28]:

    
df3_euclidean_log2









    Out[28]:






  
    
      locus_tag
      MBURv2_100001
      MBURv2_100002
      MBURv2_100003
      MBURv2_10001
      MBURv2_10002
      MBURv2_10003
      MBURv2_10004
      MBURv2_10005
      MBURv2_10006
      MBURv2_10007
      ...
      MBURv2_tRNA4
      MBURv2_tRNA40
      MBURv2_tRNA41
      MBURv2_tRNA42
      MBURv2_tRNA43
      MBURv2_tRNA44
      MBURv2_tRNA5
      MBURv2_tRNA6
      MBURv2_tRNA7
      MBURv2_tRNA8
    
    
      locus_tag
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      MBURv2_100001
      0.000000
      5.223666
      7.344067
      14.843396
      29.298972
      21.745323
      5.826292
      5.193588
      21.612292
      13.313414
      ...
      26.117059
      419.208451
      302.921397
      208.678333
      61.568151
      34.697223
      345.013663
      373.585068
      158.429465
      560.223957
    
    
      MBURv2_100002
      5.223666
      0.000000
      7.404187
      11.260013
      28.311112
      25.650899
      4.964426
      5.368636
      21.517591
      12.377645
      ...
      29.484939
      418.969169
      302.496445
      210.394089
      62.091403
      35.519929
      344.939793
      371.929211
      160.390886
      558.506612
    
    
      MBURv2_100003
      7.344067
      7.404187
      0.000000
      11.144207
      33.202231
      23.468108
      6.460714
      8.801506
      26.465912
      17.807287
      ...
      25.927345
      422.591759
      306.608212
      211.532877
      63.378991
      38.293495
      348.362274
      376.123186
      162.422325
      562.705638
    
    
      MBURv2_10001
      14.843396
      11.260013
      11.144207
      0.000000
      36.561595
      32.567248
      11.834242
      15.719222
      30.130630
      21.260764
      ...
      31.907818
      425.770363
      308.543398
      217.381023
      69.715512
      44.564529
      351.601790
      377.287805
      169.289609
      563.031356
    
    
      MBURv2_10002
      29.298972
      28.311112
      33.202231
      36.561595
      0.000000
      42.318096
      29.711239
      26.956130
      12.577875
      17.564065
      ...
      52.600629
      392.874864
      279.141782
      191.489245
      46.816720
      22.642093
      322.403709
      350.375007
      146.718629
      533.465735
    
    
      MBURv2_10003
      21.745323
      25.650899
      23.468108
      32.567248
      42.318096
      0.000000
      24.672872
      23.686298
      35.243224
      30.588730
      ...
      20.153934
      426.626657
      309.720338
      209.958893
      67.288269
      42.042220
      349.383536
      381.680300
      153.974846
      569.166693
    
    
      MBURv2_10004
      5.826292
      4.964426
      6.460714
      11.834242
      29.711239
      24.672872
      0.000000
      6.982259
      22.569002
      14.056489
      ...
      26.930440
      419.792728
      302.463486
      209.973733
      63.109560
      34.868167
      344.580788
      372.060187
      159.402700
      558.712223
    
    
      MBURv2_10005
      5.193588
      5.368636
      8.801506
      15.719222
      26.956130
      23.686298
      6.982259
      0.000000
      21.278581
      12.748837
      ...
      29.101352
      416.368657
      300.264236
      207.267639
      57.952570
      31.571145
      341.959065
      369.408899
      155.940651
      557.035614
    
    
      MBURv2_10006
      21.612292
      21.517591
      26.465912
      30.130630
      12.577875
      35.243224
      22.569002
      21.278581
      0.000000
      9.313694
      ...
      43.521775
      402.952193
      288.522213
      198.545200
      55.964345
      28.080435
      332.237623
      360.649670
      152.974739
      543.894001
    
    
      MBURv2_10007
      13.313414
      12.377645
      17.807287
      21.260764
      17.564065
      30.588730
      14.056489
      12.748837
      9.313694
      0.000000
      ...
      37.379907
      409.496997
      294.428108
      203.728809
      57.434688
      29.863178
      337.654534
      364.971505
      156.377022
      549.842995
    
    
      MBURv2_10008
      5.151832
      3.527069
      8.214563
      13.148613
      26.531868
      25.798741
      4.328001
      5.765226
      19.148144
      10.245706
      ...
      29.550887
      417.325488
      301.081284
      208.738904
      61.177857
      33.690741
      343.527125
      370.709311
      159.281358
      557.094958
    
    
      MBURv2_10009
      5.098532
      4.023633
      8.954840
      14.298731
      26.063487
      25.260119
      4.512657
      4.106327
      19.379939
      10.660253
      ...
      29.480401
      416.345368
      299.660501
      207.617770
      59.878550
      31.928729
      341.855846
      369.258604
      157.065790
      556.085646
    
    
      MBURv2_10010
      4.926435
      5.004352
      5.177241
      12.330534
      31.084286
      23.654025
      4.333495
      5.925424
      23.958112
      15.192813
      ...
      25.592905
      421.204966
      304.917675
      211.613220
      62.842726
      36.009687
      346.714979
      373.259983
      160.344005
      561.206832
    
    
      MBURv2_10011
      5.104089
      3.419565
      5.589917
      11.694331
      28.919057
      25.388140
      3.533459
      4.963500
      22.338442
      13.394031
      ...
      28.354303
      418.814756
      302.546269
      209.569689
      61.151941
      34.592945
      344.617729
      371.699040
      159.695125
      558.680952
    
    
      MBURv2_10012
      5.353349
      4.335242
      5.736744
      11.513034
      29.566707
      25.266159
      4.216527
      5.667249
      23.257519
      14.480321
      ...
      28.551417
      418.618687
      301.854251
      208.395201
      61.033044
      34.745621
      344.021013
      372.397410
      159.166847
      558.566800
    
    
      MBURv2_10013
      4.767168
      2.904000
      6.313091
      12.050952
      28.237107
      25.548689
      4.186581
      4.386920
      21.740342
      12.753933
      ...
      29.026937
      418.066858
      301.845215
      208.882883
      60.564481
      34.181762
      344.020796
      371.399311
      159.277771
      558.145870
    
    
      MBURv2_10014
      7.137315
      4.573779
      4.853084
      9.072458
      31.739107
      25.784811
      5.859765
      8.083902
      24.744124
      15.651721
      ...
      27.796783
      422.370803
      306.382679
      213.464757
      64.358179
      38.653216
      348.603566
      374.675381
      163.614499
      561.814913
    
    
      MBURv2_10015
      8.821576
      7.634443
      8.059400
      10.890085
      34.120947
      23.488656
      7.852092
      11.123362
      26.063890
      17.560773
      ...
      24.976554
      425.891701
      308.913673
      216.596630
      68.493547
      41.183690
      351.172301
      377.035327
      164.809559
      564.345631
    
    
      MBURv2_10016
      5.473072
      4.640750
      6.709995
      13.228401
      29.600020
      23.341955
      7.120622
      6.014501
      22.546320
      13.722806
      ...
      27.750761
      420.593808
      305.220634
      211.877660
      62.178194
      36.609192
      347.205541
      373.457470
      161.286224
      560.952354
    
    
      MBURv2_10017
      16.484242
      15.216898
      17.248377
      17.184570
      38.217767
      27.355731
      16.989735
      17.944885
      29.897758
      22.285965
      ...
      27.269387
      430.318642
      314.069104
      223.997088
      73.531781
      46.734393
      356.503632
      378.658753
      169.043603
      568.583008
    
    
      MBURv2_10018
      14.719229
      12.653665
      15.701557
      18.470981
      24.502257
      32.894569
      11.991885
      12.170560
      20.030998
      13.145792
      ...
      34.778681
      412.930571
      297.100743
      207.977142
      58.113204
      29.698566
      338.962733
      362.400244
      156.633173
      551.485149
    
    
      MBURv2_10019
      7.102828
      6.078040
      4.059463
      10.827932
      32.609974
      23.512314
      4.584478
      7.668536
      25.990928
      17.198762
      ...
      25.515186
      422.671662
      305.742069
      212.630346
      64.044925
      37.312389
      347.416381
      374.070513
      161.171731
      561.636255
    
    
      MBURv2_10020
      13.342915
      10.779001
      13.910215
      17.017882
      27.099766
      29.007983
      10.519584
      10.059551
      22.922785
      15.228528
      ...
      32.079102
      416.576860
      299.618006
      211.030145
      59.997550
      31.428062
      340.945743
      364.048508
      156.404548
      553.938846
    
    
      MBURv2_10021
      15.809889
      16.304014
      18.876677
      25.297322
      25.343368
      28.480633
      16.531385
      12.930709
      21.101930
      15.827576
      ...
      32.603369
      412.271905
      297.914770
      206.873601
      54.683456
      27.153217
      338.574340
      361.815627
      151.696294
      553.646611
    
    
      MBURv2_10022
      48.706590
      48.057810
      52.109685
      56.060319
      28.035638
      56.189754
      47.868454
      46.014111
      33.874448
      39.029004
      ...
      64.530073
      386.020025
      271.373271
      193.208714
      53.966673
      28.848718
      311.948597
      331.503029
      136.339993
      521.825252
    
    
      MBURv2_10023
      17.345098
      16.513345
      21.468757
      22.759344
      23.458081
      31.277151
      15.672589
      18.065964
      15.472613
      12.637325
      ...
      36.104015
      412.512668
      292.918452
      205.322444
      64.960770
      32.260503
      336.531768
      365.332080
      155.048010
      548.917820
    
    
      MBURv2_10024
      22.072849
      21.269868
      22.740729
      28.163360
      19.475415
      31.497065
      21.655572
      20.610949
      17.531862
      15.960287
      ...
      42.118978
      407.671006
      294.558853
      201.968313
      54.336837
      30.759292
      336.314530
      363.013479
      156.090720
      546.831545
    
    
      MBURv2_10025
      5.843282
      6.871662
      4.477948
      13.070060
      32.393181
      20.603738
      5.418681
      7.488392
      25.552959
      17.172906
      ...
      24.512952
      422.169634
      305.116236
      210.814710
      63.406847
      36.695945
      346.725948
      375.007036
      159.698864
      561.748556
    
    
      MBURv2_10026
      5.789831
      6.246067
      5.793123
      13.005690
      29.802820
      22.436749
      6.676438
      8.228959
      22.595212
      14.372844
      ...
      28.105768
      419.916980
      303.758392
      208.904746
      62.432530
      36.788103
      346.060773
      375.312493
      160.985626
      560.090725
    
    
      MBURv2_10027
      5.012880
      3.927759
      4.278387
      10.509528
      31.547935
      23.715766
      4.513973
      6.442083
      24.676551
      15.750225
      ...
      26.620072
      421.578129
      304.869190
      211.431882
      63.405540
      37.190341
      346.946129
      374.513045
      161.138404
      561.269852
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      MBURv2_tRNA17
      89.598686
      89.569546
      92.759656
      96.699079
      71.982421
      95.585421
      88.510859
      86.197666
      80.070636
      83.652526
      ...
      101.767019
      351.970195
      228.824117
      163.622679
      63.979192
      56.702727
      264.224739
      291.267918
      93.575526
      486.762888
    
    
      MBURv2_tRNA18
      528.611516
      526.668562
      531.344919
      530.181888
      503.510613
      541.573212
      527.131137
      525.511121
      513.253973
      518.514647
      ...
      548.989866
      242.002337
      269.042789
      420.108531
      490.962993
      501.397772
      272.178172
      271.203277
      460.464934
      154.186542
    
    
      MBURv2_tRNA19
      45.001836
      42.096171
      45.984124
      38.793854
      47.010446
      61.604272
      41.612055
      45.657097
      43.266400
      41.371464
      ...
      60.345644
      415.339953
      290.226140
      214.024830
      86.086118
      56.948881
      338.080158
      366.446783
      171.563743
      542.983220
    
    
      MBURv2_tRNA2
      415.297577
      416.393509
      413.827333
      418.146270
      407.299425
      412.983467
      415.480054
      415.123490
      411.884815
      414.005997
      ...
      424.329740
      425.279357
      396.710160
      294.614779
      386.389530
      404.556761
      408.563156
      545.642954
      400.694299
      555.650298
    
    
      MBURv2_tRNA21
      184.256895
      188.594409
      184.410970
      192.908915
      201.989348
      166.807344
      187.809673
      185.965860
      196.128847
      193.338233
      ...
      166.317274
      519.483861
      423.205160
      287.545142
      197.042766
      193.947217
      446.175888
      491.771084
      228.587445
      692.860408
    
    
      MBURv2_tRNA24
      40.410035
      39.474865
      43.109352
      44.868987
      29.625714
      50.849241
      38.785253
      37.753109
      35.204007
      35.790965
      ...
      58.637018
      388.851935
      265.367618
      182.391183
      50.646849
      23.777437
      307.669126
      342.309612
      132.322112
      523.770497
    
    
      MBURv2_tRNA27
      1307.095350
      1309.510973
      1311.610325
      1319.097838
      1290.518093
      1302.592800
      1308.377514
      1305.237997
      1296.571333
      1302.560038
      ...
      1310.692689
      1030.104551
      1086.255756
      1145.353302
      1264.962557
      1275.659750
      1030.809307
      1092.566062
      1157.036870
      1046.090207
    
    
      MBURv2_tRNA28
      1750.818854
      1750.350383
      1754.848092
      1757.611217
      1727.163563
      1756.299883
      1750.618791
      1747.178421
      1736.771021
      1742.370515
      ...
      1764.368238
      1401.851348
      1487.074290
      1621.686576
      1705.265267
      1718.110614
      1440.433784
      1397.821662
      1624.524665
      1293.385865
    
    
      MBURv2_tRNA29
      1688.357563
      1688.442695
      1690.953012
      1693.879444
      1680.062415
      1689.243765
      1687.783661
      1685.341327
      1684.071790
      1685.716953
      ...
      1686.419649
      1616.928434
      1617.928622
      1690.393253
      1668.582254
      1667.701908
      1578.798323
      1448.707111
      1603.347421
      1598.527364
    
    
      MBURv2_tRNA3
      725.961575
      726.736261
      727.417878
      730.447746
      711.158594
      726.581631
      726.584013
      725.218716
      717.714374
      721.865962
      ...
      739.554304
      518.549743
      541.842120
      545.903423
      690.350803
      707.537303
      537.204700
      712.623951
      657.084881
      571.029864
    
    
      MBURv2_tRNA30
      1623.630931
      1625.769305
      1627.184656
      1634.631891
      1613.404920
      1617.411856
      1624.693388
      1620.931312
      1618.942891
      1622.155984
      ...
      1621.538115
      1449.605414
      1478.228040
      1525.963343
      1586.039357
      1595.292326
      1415.937648
      1401.826045
      1480.514318
      1473.249123
    
    
      MBURv2_tRNA31
      832.246686
      834.562517
      837.168203
      842.688550
      825.939926
      829.750315
      833.996759
      830.575036
      828.331758
      831.194569
      ...
      829.009070
      763.051880
      746.532921
      778.224488
      808.368366
      810.359452
      714.758590
      704.042644
      716.926989
      866.732078
    
    
      MBURv2_tRNA32
      91.256892
      95.537997
      95.163054
      104.436681
      92.847570
      81.544943
      93.530602
      91.311219
      91.643710
      93.634404
      ...
      85.794106
      395.897546
      278.624793
      169.475581
      86.211996
      75.442714
      309.327317
      361.332470
      97.964651
      551.754700
    
    
      MBURv2_tRNA33
      3714.406269
      3712.633916
      3716.643675
      3717.342013
      3689.917833
      3721.891486
      3712.451690
      3710.463526
      3700.358970
      3705.298006
      ...
      3729.726831
      3379.113976
      3452.057772
      3600.104992
      3673.116737
      3683.248598
      3408.852386
      3347.065546
      3607.494981
      3191.876275
    
    
      MBURv2_tRNA34
      2264.902744
      2263.421788
      2266.834227
      2268.274163
      2240.712340
      2271.734821
      2263.077758
      2260.929503
      2251.556771
      2256.354522
      ...
      2280.253488
      1919.575639
      1996.873806
      2135.592962
      2219.641075
      2232.713911
      1949.219983
      1905.103791
      2153.009631
      1742.271018
    
    
      MBURv2_tRNA35
      2525.574860
      2523.732895
      2527.659293
      2527.733191
      2500.503631
      2534.356695
      2523.579040
      2521.979155
      2511.086524
      2516.215405
      ...
      2543.164161
      2171.345149
      2245.106825
      2392.639565
      2483.894455
      2495.196254
      2208.186487
      2180.960039
      2424.366480
      1976.152881
    
    
      MBURv2_tRNA36
      526.928633
      526.754539
      529.963080
      534.091200
      506.457276
      527.833869
      526.012373
      523.224923
      515.596497
      520.326172
      ...
      535.688676
      352.247894
      333.347056
      439.337753
      486.500870
      493.994969
      277.716358
      218.947926
      398.314453
      336.182733
    
    
      MBURv2_tRNA37
      108.273438
      105.302777
      109.834925
      106.079529
      87.772027
      124.280327
      105.519978
      106.052311
      95.142485
      98.618472
      ...
      129.565015
      350.336706
      225.431969
      184.333360
      101.980090
      93.031462
      277.365701
      304.436675
      164.489077
      466.179068
    
    
      MBURv2_tRNA38
      444.225044
      449.016741
      446.033069
      454.440061
      452.364214
      428.875487
      446.147166
      446.392012
      448.229542
      449.626373
      ...
      429.337661
      612.881904
      519.301571
      402.591704
      445.035667
      438.663268
      520.895381
      642.002342
      391.583424
      771.744075
    
    
      MBURv2_tRNA39
      884.540437
      885.837114
      887.716339
      893.193527
      863.189283
      885.146760
      885.457252
      882.753338
      871.371429
      877.783238
      ...
      897.804248
      529.374047
      627.316643
      680.056539
      835.597544
      855.003043
      589.737471
      718.921385
      767.269618
      555.384858
    
    
      MBURv2_tRNA4
      26.117059
      29.484939
      25.927345
      31.907818
      52.600629
      20.153934
      26.930440
      29.101352
      43.521775
      37.379907
      ...
      0.000000
      438.825428
      319.976013
      221.675919
      79.331880
      51.108117
      359.763697
      388.578615
      162.922679
      579.852901
    
    
      MBURv2_tRNA40
      419.208451
      418.969169
      422.591759
      425.770363
      392.874864
      426.626657
      419.792728
      416.368657
      402.952193
      409.496997
      ...
      438.825428
      0.000000
      177.771848
      259.679241
      367.531752
      389.149017
      173.087842
      257.997541
      333.585789
      251.853620
    
    
      MBURv2_tRNA41
      302.921397
      302.496445
      306.608212
      308.543398
      279.141782
      309.720338
      302.463486
      300.264236
      288.522213
      294.428108
      ...
      319.976013
      177.771848
      0.000000
      171.270321
      262.597191
      272.509764
      72.889486
      211.780618
      207.629045
      290.536740
    
    
      MBURv2_tRNA42
      208.678333
      210.394089
      211.532877
      217.381023
      191.489245
      209.958893
      209.973733
      207.267639
      198.545200
      203.728809
      ...
      221.675919
      259.679241
      171.270321
      0.000000
      163.613686
      182.512782
      200.596450
      322.729339
      139.946644
      438.990067
    
    
      MBURv2_tRNA43
      61.568151
      62.091403
      63.378991
      69.715512
      46.816720
      67.288269
      63.109560
      57.952570
      55.964345
      57.434688
      ...
      79.331880
      367.531752
      262.597191
      163.613686
      0.000000
      38.349715
      299.803018
      334.153508
      124.675989
      519.912965
    
    
      MBURv2_tRNA44
      34.697223
      35.519929
      38.293495
      44.564529
      22.642093
      42.042220
      34.868167
      31.571145
      28.080435
      29.863178
      ...
      51.108117
      389.149017
      272.509764
      182.512782
      38.349715
      0.000000
      312.294313
      343.007076
      128.543379
      531.028663
    
    
      MBURv2_tRNA5
      345.013663
      344.939793
      348.362274
      351.601790
      322.403709
      349.383536
      344.580788
      341.959065
      332.237623
      337.654534
      ...
      359.763697
      173.087842
      72.889486
      200.596450
      299.803018
      312.294313
      0.000000
      195.888914
      225.452239
      275.748471
    
    
      MBURv2_tRNA6
      373.585068
      371.929211
      376.123186
      377.287805
      350.375007
      381.680300
      372.060187
      369.408899
      360.649670
      364.971505
      ...
      388.578615
      257.997541
      211.780618
      322.729339
      334.153508
      343.007076
      195.888914
      0.000000
      288.852254
      286.536007
    
    
      MBURv2_tRNA7
      158.429465
      160.390886
      162.422325
      169.289609
      146.718629
      153.974846
      159.402700
      155.940651
      152.974739
      156.377022
      ...
      162.922679
      333.585789
      207.629045
      139.946644
      124.675989
      128.543379
      225.452239
      288.852254
      0.000000
      479.344639
    
    
      MBURv2_tRNA8
      560.223957
      558.506612
      562.705638
      563.031356
      533.465735
      569.166693
      558.712223
      557.035614
      543.894001
      549.842995
      ...
      579.852901
      251.853620
      290.536740
      438.990067
      519.912965
      531.028663
      275.748471
      286.536007
      479.344639
      0.000000
    
  

4480 rows × 4480 columns



In [13]:

    
# Clustering the log2 transformed euclidean distance of TPM counts 

hdb_t1 = time.time()
hdb_euclidean_log2 = hdbscan.HDBSCAN(metric = "precomputed", min_cluster_size=10).fit(df3_euclidean_log2)
hdb_euclidean_log2_labels = hdb_euclidean_log2.labels_
hdb_elapsed_time = time.time() - hdb_t1
print("time to cluster", hdb_elapsed_time)









    



time to cluster 4.981569051742554



In [14]:

    
print(np.unique(hdb_euclidean_log2_labels))
print(np.bincount(hdb_euclidean_log2_labels[hdb_euclidean_log2_labels!=-1]))









    



[-1]
[]



In [15]:

    
euclidean_log2_clusters = {i: np.where(hdb_euclidean_log2_labels == i)[0] for i in range(2)}
df2_TPM.iloc[euclidean_log2_clusters[1],:]









    Out[15]:






  
    
      
      product
      type
      gene_symbol
      locus
      start_coord
      end_coord
      note
      translation
      5GB1_FM40_T0m_TR2
      5GB1_FM40_T10m_TR3
      5GB1_FM40_T20m_TR2
      5GB1_FM40_T40m_TR1
      5GB1_FM40_T60m_TR1
      5GB1_FM40_T90m_TR2
      5GB1_FM40_T150m_TR1_remake
      5GB1_FM40_T180m_TR1
    
    
      locus_tag

Clustering using built-in HDBSCAN euclidean distance metric (mean centered and scaled to unit variance)



In [16]:

    
df2_TPM_values = df2_TPM.loc[:,"5GB1_FM40_T0m_TR2":"5GB1_FM40_T180m_TR1"] #isolating the data values 
    
df2_TPM_values_T = df2_TPM_values.T #transposing the data
standard_scaler = StandardScaler()

TPM_counts_mean_centered = standard_scaler.fit_transform(df2_TPM_values_T) #mean centering the data 
TPM_counts_mean_centered = pd.DataFrame(TPM_counts_mean_centered) #back to Dataframe

#transposing back to original form and reincerting indeces and columns 
my_index = df2_TPM_values.index
my_columns = df2_TPM_values.columns

TPM_counts_mean_centered = TPM_counts_mean_centered.T
TPM_counts_mean_centered.set_index(my_index, inplace=True)
TPM_counts_mean_centered.columns = my_columns



In [17]:

    
# Clustering the pearsons_R with N/A vlaues removed 

hdb_t1 = time.time()
hdb_euclidean = hdbscan.HDBSCAN(metric = "euclidean", min_cluster_size=5).fit(TPM_counts_mean_centered)
hdb_euclidean_labels = hdb_euclidean.labels_
hdb_elapsed_time = time.time() - hdb_t1
print("time to cluster", hdb_elapsed_time)









    



time to cluster 1.2530548572540283



In [18]:

    
print(np.unique(hdb_euclidean_labels))
print(np.bincount(hdb_euclidean_labels[hdb_euclidean_labels!=-1]))









    



[-1  0  1  2  3  4  5  6]
[   5    6    9  569    6 1538    6]

lets look at some clusters

Euclidean_standard_scaled_clusters = {i: np.where(hdb_euclidean_labels == i)[0] for i in range(7)} df2_TPM.iloc[Euclidean_standard_scaled_clusters[0],:]



In [30]:

    
Euclidean_standard_scaled_clusters = {i: np.where(hdb_euclidean_labels == i)[0] for i in range(7)}
df2_TPM.iloc[Euclidean_standard_scaled_clusters[1],:]









    Out[30]:






  
    
      
      product
      type
      gene_symbol
      locus
      start_coord
      end_coord
      note
      translation
      5GB1_FM40_T0m_TR2
      5GB1_FM40_T10m_TR3
      5GB1_FM40_T20m_TR2
      5GB1_FM40_T40m_TR1
      5GB1_FM40_T60m_TR1
      5GB1_FM40_T90m_TR2
      5GB1_FM40_T150m_TR1_remake
      5GB1_FM40_T180m_TR1
    
    
      locus_tag
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      MBURv2_130823
      conserved hypothetical protein; putative membr...
      CDS
      NaN
      MBURv2
      3057071
      3057622
      Evidence 4 : Homologs of previously reported g...
      NaN
      54.074431
      42.385029
      49.380410
      45.924264
      54.430108
      52.911230
      42.512312
      49.776228
    
    
      MBURv2_160196
      DNA replication and repair protein RecF
      CDS
      recF
      MBURv2
      3476509
      3477588
      NaN
      NaN
      39.716085
      33.586759
      38.475054
      34.019740
      41.174621
      39.432168
      34.425668
      39.151422
    
    
      MBURv2_210033
      histidyl tRNA synthetase
      CDS
      hisS
      MBURv2
      4129786
      4131057
      Evidence 2a : Function of homologous gene expe...
      NaN
      184.219401
      153.374251
      180.859518
      160.574002
      190.176907
      182.024224
      158.874769
      178.341500
    
    
      MBURv2_260039
      hydroxymethylbilane synthase
      CDS
      hemC
      MBURv2
      5063172
      5064098
      Evidence 2a : Function of homologous gene expe...
      NaN
      186.700990
      151.238240
      172.944983
      160.882580
      188.444713
      184.348282
      153.582663
      184.702961
    
    
      MBURv2_60016
      carbamoyl phosphate synthetase small subunit, ...
      CDS
      carA
      MBURv2
      1374314
      1375447
      Evidence 2a : Function of homologous gene expe...
      NaN
      417.342736
      361.297562
      405.602208
      364.758505
      454.132299
      433.434899
      357.685236
      404.088838
    
    
      MBURv2_60050
      Na+/H+ antiporter NhaD
      CDS
      nhaD
      MBURv2
      1419064
      1420479
      NaN
      NaN
      196.773053
      167.449917
      183.707146
      168.936189
      219.249940
      198.774079
      178.440470
      188.004395

Euclidean_standard_scaled_clusters

Clustering log2 transformed data using built-in HDBSCAN euclidean distance metric (mean centered and scaled to unit variance)



In [19]:

    
df2_TPM_log2_scale= df2_TPM_log2.T #transposing the data
standard_scaler = StandardScaler()

TPM_log2_mean_scaled = standard_scaler.fit_transform(df2_TPM_log2_scale) #mean centering the data 
TPM_log2_mean_scaled = pd.DataFrame(TPM_log2_mean_scaled) #back to Dataframe

#transposing back to original form and reincerting indeces and columns 
my_index = df2_TPM_values.index
my_columns = df2_TPM_values.columns

TPM_log2_mean_scaled = TPM_log2_mean_scaled.T
TPM_log2_mean_scaled.set_index(my_index, inplace=True)
TPM_log2_mean_scaled.columns = my_columns



In [20]:

    
# Clustering the pearsons_R with N/A vlaues removed 

hdb_t1 = time.time()
hdb_log2_euclidean = hdbscan.HDBSCAN(metric = "euclidean", min_cluster_size=5).fit(TPM_log2_mean_scaled)
hdb_log2_euclidean = hdb_log2_euclidean.labels_
hdb_elapsed_time = time.time() - hdb_t1
print("time to cluster", hdb_elapsed_time)









    



time to cluster 1.0190801620483398



In [21]:

    
print(np.unique(hdb_log2_euclidean))
print(np.bincount(hdb_log2_euclidean[hdb_log2_euclidean!=-1]))









    



[0 1]
[   8 4472]



In [ ]:

	product	type	gene_symbol	locus	start_coord	end_coord	note	translation	5GB1_FM40_T0m_TR2	5GB1_FM40_T10m_TR3	5GB1_FM40_T20m_TR2	5GB1_FM40_T40m_TR1	5GB1_FM40_T60m_TR1	5GB1_FM40_T90m_TR2	5GB1_FM40_T150m_TR1_remake	5GB1_FM40_T180m_TR1
locus_tag
MBURv2_160283	putative ferric siderophore transport system, ...	CDS	NaN	MBURv2	3576162	3576686	Evidence 3 : Function proposed based on presen...	NaN	118.243568	112.275736	110.964306	100.836391	77.197657	70.032566	73.893920	95.436505
MBURv2_20180	ferrous iron transporter, protein A	CDS	feoA	MBURv2	281921	282154	Evidence 2a : Function of homologous gene expe...	NaN	210.424105	158.632845	181.080545	126.342956	61.051857	70.194605	95.464947	118.980515
MBURv2_20181	fused ferrous iron transporter, protein B: GTP...	CDS	feoB	MBURv2	282154	284478	Evidence 2a : Function of homologous gene expe...	NaN	95.819838	86.744847	91.146440	71.621996	32.461622	39.674872	55.479556	64.157192
MBURv2_20183	conserved protein of unknown function	CDS	NaN	MBURv2	284953	286617	Evidence 4 : Homologs of previously reported g...	NaN	92.724781	77.558180	86.194789	70.352941	49.814896	47.463966	55.260844	59.371024
MBURv2_210346	Histidine kinase	CDS	NaN	MBURv2	4474339	4475010	NaN	NaN	70.518396	68.822867	66.867096	63.493064	49.910520	54.874815	53.845876	59.197819
MBURv2_240071	conserved protein of unknown function	CDS	NaN	MBURv2	4822700	4823671	Evidence 4 : Homologs of previously reported g...	NaN	115.521215	125.266171	122.873477	171.859978	242.070964	230.034969	215.806091	173.416287
MBURv2_240072	conserved protein of unknown function	CDS	NaN	MBURv2	4823671	4825122	Evidence 4 : Homologs of previously reported g...	NaN	152.884221	166.004637	159.391668	244.378006	339.549453	333.040188	298.420330	260.795367
MBURv2_30040	conserved exported protein of unknown function	CDS	NaN	MBURv2	648480	649751	Evidence 4 : Homologs of previously reported g...	NaN	346.196665	456.748233	416.782502	555.641953	1007.956458	965.279974	855.040811	766.323303
MBURv2_30115	Integrase/recombinase	CDS	int	MBURv2	749393	750415	NaN	NaN	18.954208	19.738377	18.230891	15.962341	11.687684	12.175099	14.009989	14.678014
MBURv2_60111	nitrite reductase, large subunit, NAD(P)H-binding	CDS	nirB	MBURv2	1491766	1494321	Evidence 2a : Function of homologous gene expe...	NaN	605.258423	691.437830	667.622137	817.519655	1077.573416	1024.202207	988.430960	914.548557
MBURv2_60113	Response regulator receiver and ANTAR domain p...	CDS	NaN	MBURv2	1496503	1497081	NaN	NaN	23.493069	27.461091	27.963444	34.230125	49.110936	51.477168	44.846438	43.132774
MBURv2_60177	Transcriptional antiterminator, Rof	CDS	NaN	MBURv2	1561635	1561889	NaN	NaN	245.862088	217.642197	254.978512	131.689317	45.544737	44.577813	81.965303	131.877279
MBURv2_80064	fused DNA-binding response regulator in two-co...	CDS	glnG	MBURv2	1901015	1902424	Evidence 2a : Function of homologous gene expe...	NaN	108.514855	116.924797	121.151452	163.911171	229.124357	225.811603	183.639002	180.891342
MBURv2_80067	nitrogen assimilation regulatory protein for G...	CDS	glnK	MBURv2	1906036	1906374	Evidence 2a : Function of homologous gene expe...	NaN	1236.258838	1480.016871	1362.990010	2351.761430	3215.524758	3305.708022	3031.353754	2351.879330
MBURv2_80070	ammonium transporter	CDS	amtB	MBURv2	1908452	1909744	Evidence 2a : Function of homologous gene expe...	NaN	178.796228	216.015070	211.287476	293.525608	434.560734	446.470951	376.033774	368.793380

locus_tag	MBURv2_100001	MBURv2_100002	MBURv2_100003	MBURv2_10001	MBURv2_10002	MBURv2_10003	MBURv2_10004	MBURv2_10005	MBURv2_10006	MBURv2_10007	...	MBURv2_tRNA4	MBURv2_tRNA40	MBURv2_tRNA41	MBURv2_tRNA42	MBURv2_tRNA43	MBURv2_tRNA44	MBURv2_tRNA5	MBURv2_tRNA6	MBURv2_tRNA7	MBURv2_tRNA8
locus_tag
MBURv2_100001	0.000000	5.223666	7.344067	14.843396	29.298972	21.745323	5.826292	5.193588	21.612292	13.313414	...	26.117059	419.208451	302.921397	208.678333	61.568151	34.697223	345.013663	373.585068	158.429465	560.223957
MBURv2_100002	5.223666	0.000000	7.404187	11.260013	28.311112	25.650899	4.964426	5.368636	21.517591	12.377645	...	29.484939	418.969169	302.496445	210.394089	62.091403	35.519929	344.939793	371.929211	160.390886	558.506612
MBURv2_100003	7.344067	7.404187	0.000000	11.144207	33.202231	23.468108	6.460714	8.801506	26.465912	17.807287	...	25.927345	422.591759	306.608212	211.532877	63.378991	38.293495	348.362274	376.123186	162.422325	562.705638
MBURv2_10001	14.843396	11.260013	11.144207	0.000000	36.561595	32.567248	11.834242	15.719222	30.130630	21.260764	...	31.907818	425.770363	308.543398	217.381023	69.715512	44.564529	351.601790	377.287805	169.289609	563.031356
MBURv2_10002	29.298972	28.311112	33.202231	36.561595	0.000000	42.318096	29.711239	26.956130	12.577875	17.564065	...	52.600629	392.874864	279.141782	191.489245	46.816720	22.642093	322.403709	350.375007	146.718629	533.465735
MBURv2_10003	21.745323	25.650899	23.468108	32.567248	42.318096	0.000000	24.672872	23.686298	35.243224	30.588730	...	20.153934	426.626657	309.720338	209.958893	67.288269	42.042220	349.383536	381.680300	153.974846	569.166693
MBURv2_10004	5.826292	4.964426	6.460714	11.834242	29.711239	24.672872	0.000000	6.982259	22.569002	14.056489	...	26.930440	419.792728	302.463486	209.973733	63.109560	34.868167	344.580788	372.060187	159.402700	558.712223
MBURv2_10005	5.193588	5.368636	8.801506	15.719222	26.956130	23.686298	6.982259	0.000000	21.278581	12.748837	...	29.101352	416.368657	300.264236	207.267639	57.952570	31.571145	341.959065	369.408899	155.940651	557.035614
MBURv2_10006	21.612292	21.517591	26.465912	30.130630	12.577875	35.243224	22.569002	21.278581	0.000000	9.313694	...	43.521775	402.952193	288.522213	198.545200	55.964345	28.080435	332.237623	360.649670	152.974739	543.894001
MBURv2_10007	13.313414	12.377645	17.807287	21.260764	17.564065	30.588730	14.056489	12.748837	9.313694	0.000000	...	37.379907	409.496997	294.428108	203.728809	57.434688	29.863178	337.654534	364.971505	156.377022	549.842995
MBURv2_10008	5.151832	3.527069	8.214563	13.148613	26.531868	25.798741	4.328001	5.765226	19.148144	10.245706	...	29.550887	417.325488	301.081284	208.738904	61.177857	33.690741	343.527125	370.709311	159.281358	557.094958
MBURv2_10009	5.098532	4.023633	8.954840	14.298731	26.063487	25.260119	4.512657	4.106327	19.379939	10.660253	...	29.480401	416.345368	299.660501	207.617770	59.878550	31.928729	341.855846	369.258604	157.065790	556.085646
MBURv2_10010	4.926435	5.004352	5.177241	12.330534	31.084286	23.654025	4.333495	5.925424	23.958112	15.192813	...	25.592905	421.204966	304.917675	211.613220	62.842726	36.009687	346.714979	373.259983	160.344005	561.206832
MBURv2_10011	5.104089	3.419565	5.589917	11.694331	28.919057	25.388140	3.533459	4.963500	22.338442	13.394031	...	28.354303	418.814756	302.546269	209.569689	61.151941	34.592945	344.617729	371.699040	159.695125	558.680952
MBURv2_10012	5.353349	4.335242	5.736744	11.513034	29.566707	25.266159	4.216527	5.667249	23.257519	14.480321	...	28.551417	418.618687	301.854251	208.395201	61.033044	34.745621	344.021013	372.397410	159.166847	558.566800
MBURv2_10013	4.767168	2.904000	6.313091	12.050952	28.237107	25.548689	4.186581	4.386920	21.740342	12.753933	...	29.026937	418.066858	301.845215	208.882883	60.564481	34.181762	344.020796	371.399311	159.277771	558.145870
MBURv2_10014	7.137315	4.573779	4.853084	9.072458	31.739107	25.784811	5.859765	8.083902	24.744124	15.651721	...	27.796783	422.370803	306.382679	213.464757	64.358179	38.653216	348.603566	374.675381	163.614499	561.814913
MBURv2_10015	8.821576	7.634443	8.059400	10.890085	34.120947	23.488656	7.852092	11.123362	26.063890	17.560773	...	24.976554	425.891701	308.913673	216.596630	68.493547	41.183690	351.172301	377.035327	164.809559	564.345631
MBURv2_10016	5.473072	4.640750	6.709995	13.228401	29.600020	23.341955	7.120622	6.014501	22.546320	13.722806	...	27.750761	420.593808	305.220634	211.877660	62.178194	36.609192	347.205541	373.457470	161.286224	560.952354
MBURv2_10017	16.484242	15.216898	17.248377	17.184570	38.217767	27.355731	16.989735	17.944885	29.897758	22.285965	...	27.269387	430.318642	314.069104	223.997088	73.531781	46.734393	356.503632	378.658753	169.043603	568.583008
MBURv2_10018	14.719229	12.653665	15.701557	18.470981	24.502257	32.894569	11.991885	12.170560	20.030998	13.145792	...	34.778681	412.930571	297.100743	207.977142	58.113204	29.698566	338.962733	362.400244	156.633173	551.485149
MBURv2_10019	7.102828	6.078040	4.059463	10.827932	32.609974	23.512314	4.584478	7.668536	25.990928	17.198762	...	25.515186	422.671662	305.742069	212.630346	64.044925	37.312389	347.416381	374.070513	161.171731	561.636255
MBURv2_10020	13.342915	10.779001	13.910215	17.017882	27.099766	29.007983	10.519584	10.059551	22.922785	15.228528	...	32.079102	416.576860	299.618006	211.030145	59.997550	31.428062	340.945743	364.048508	156.404548	553.938846
MBURv2_10021	15.809889	16.304014	18.876677	25.297322	25.343368	28.480633	16.531385	12.930709	21.101930	15.827576	...	32.603369	412.271905	297.914770	206.873601	54.683456	27.153217	338.574340	361.815627	151.696294	553.646611
MBURv2_10022	48.706590	48.057810	52.109685	56.060319	28.035638	56.189754	47.868454	46.014111	33.874448	39.029004	...	64.530073	386.020025	271.373271	193.208714	53.966673	28.848718	311.948597	331.503029	136.339993	521.825252
MBURv2_10023	17.345098	16.513345	21.468757	22.759344	23.458081	31.277151	15.672589	18.065964	15.472613	12.637325	...	36.104015	412.512668	292.918452	205.322444	64.960770	32.260503	336.531768	365.332080	155.048010	548.917820
MBURv2_10024	22.072849	21.269868	22.740729	28.163360	19.475415	31.497065	21.655572	20.610949	17.531862	15.960287	...	42.118978	407.671006	294.558853	201.968313	54.336837	30.759292	336.314530	363.013479	156.090720	546.831545
MBURv2_10025	5.843282	6.871662	4.477948	13.070060	32.393181	20.603738	5.418681	7.488392	25.552959	17.172906	...	24.512952	422.169634	305.116236	210.814710	63.406847	36.695945	346.725948	375.007036	159.698864	561.748556
MBURv2_10026	5.789831	6.246067	5.793123	13.005690	29.802820	22.436749	6.676438	8.228959	22.595212	14.372844	...	28.105768	419.916980	303.758392	208.904746	62.432530	36.788103	346.060773	375.312493	160.985626	560.090725
MBURv2_10027	5.012880	3.927759	4.278387	10.509528	31.547935	23.715766	4.513973	6.442083	24.676551	15.750225	...	26.620072	421.578129	304.869190	211.431882	63.405540	37.190341	346.946129	374.513045	161.138404	561.269852
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
MBURv2_tRNA17	89.598686	89.569546	92.759656	96.699079	71.982421	95.585421	88.510859	86.197666	80.070636	83.652526	...	101.767019	351.970195	228.824117	163.622679	63.979192	56.702727	264.224739	291.267918	93.575526	486.762888
MBURv2_tRNA18	528.611516	526.668562	531.344919	530.181888	503.510613	541.573212	527.131137	525.511121	513.253973	518.514647	...	548.989866	242.002337	269.042789	420.108531	490.962993	501.397772	272.178172	271.203277	460.464934	154.186542
MBURv2_tRNA19	45.001836	42.096171	45.984124	38.793854	47.010446	61.604272	41.612055	45.657097	43.266400	41.371464	...	60.345644	415.339953	290.226140	214.024830	86.086118	56.948881	338.080158	366.446783	171.563743	542.983220
MBURv2_tRNA2	415.297577	416.393509	413.827333	418.146270	407.299425	412.983467	415.480054	415.123490	411.884815	414.005997	...	424.329740	425.279357	396.710160	294.614779	386.389530	404.556761	408.563156	545.642954	400.694299	555.650298
MBURv2_tRNA21	184.256895	188.594409	184.410970	192.908915	201.989348	166.807344	187.809673	185.965860	196.128847	193.338233	...	166.317274	519.483861	423.205160	287.545142	197.042766	193.947217	446.175888	491.771084	228.587445	692.860408
MBURv2_tRNA24	40.410035	39.474865	43.109352	44.868987	29.625714	50.849241	38.785253	37.753109	35.204007	35.790965	...	58.637018	388.851935	265.367618	182.391183	50.646849	23.777437	307.669126	342.309612	132.322112	523.770497
MBURv2_tRNA27	1307.095350	1309.510973	1311.610325	1319.097838	1290.518093	1302.592800	1308.377514	1305.237997	1296.571333	1302.560038	...	1310.692689	1030.104551	1086.255756	1145.353302	1264.962557	1275.659750	1030.809307	1092.566062	1157.036870	1046.090207
MBURv2_tRNA28	1750.818854	1750.350383	1754.848092	1757.611217	1727.163563	1756.299883	1750.618791	1747.178421	1736.771021	1742.370515	...	1764.368238	1401.851348	1487.074290	1621.686576	1705.265267	1718.110614	1440.433784	1397.821662	1624.524665	1293.385865
MBURv2_tRNA29	1688.357563	1688.442695	1690.953012	1693.879444	1680.062415	1689.243765	1687.783661	1685.341327	1684.071790	1685.716953	...	1686.419649	1616.928434	1617.928622	1690.393253	1668.582254	1667.701908	1578.798323	1448.707111	1603.347421	1598.527364
MBURv2_tRNA3	725.961575	726.736261	727.417878	730.447746	711.158594	726.581631	726.584013	725.218716	717.714374	721.865962	...	739.554304	518.549743	541.842120	545.903423	690.350803	707.537303	537.204700	712.623951	657.084881	571.029864
MBURv2_tRNA30	1623.630931	1625.769305	1627.184656	1634.631891	1613.404920	1617.411856	1624.693388	1620.931312	1618.942891	1622.155984	...	1621.538115	1449.605414	1478.228040	1525.963343	1586.039357	1595.292326	1415.937648	1401.826045	1480.514318	1473.249123
MBURv2_tRNA31	832.246686	834.562517	837.168203	842.688550	825.939926	829.750315	833.996759	830.575036	828.331758	831.194569	...	829.009070	763.051880	746.532921	778.224488	808.368366	810.359452	714.758590	704.042644	716.926989	866.732078
MBURv2_tRNA32	91.256892	95.537997	95.163054	104.436681	92.847570	81.544943	93.530602	91.311219	91.643710	93.634404	...	85.794106	395.897546	278.624793	169.475581	86.211996	75.442714	309.327317	361.332470	97.964651	551.754700
MBURv2_tRNA33	3714.406269	3712.633916	3716.643675	3717.342013	3689.917833	3721.891486	3712.451690	3710.463526	3700.358970	3705.298006	...	3729.726831	3379.113976	3452.057772	3600.104992	3673.116737	3683.248598	3408.852386	3347.065546	3607.494981	3191.876275
MBURv2_tRNA34	2264.902744	2263.421788	2266.834227	2268.274163	2240.712340	2271.734821	2263.077758	2260.929503	2251.556771	2256.354522	...	2280.253488	1919.575639	1996.873806	2135.592962	2219.641075	2232.713911	1949.219983	1905.103791	2153.009631	1742.271018
MBURv2_tRNA35	2525.574860	2523.732895	2527.659293	2527.733191	2500.503631	2534.356695	2523.579040	2521.979155	2511.086524	2516.215405	...	2543.164161	2171.345149	2245.106825	2392.639565	2483.894455	2495.196254	2208.186487	2180.960039	2424.366480	1976.152881
MBURv2_tRNA36	526.928633	526.754539	529.963080	534.091200	506.457276	527.833869	526.012373	523.224923	515.596497	520.326172	...	535.688676	352.247894	333.347056	439.337753	486.500870	493.994969	277.716358	218.947926	398.314453	336.182733
MBURv2_tRNA37	108.273438	105.302777	109.834925	106.079529	87.772027	124.280327	105.519978	106.052311	95.142485	98.618472	...	129.565015	350.336706	225.431969	184.333360	101.980090	93.031462	277.365701	304.436675	164.489077	466.179068
MBURv2_tRNA38	444.225044	449.016741	446.033069	454.440061	452.364214	428.875487	446.147166	446.392012	448.229542	449.626373	...	429.337661	612.881904	519.301571	402.591704	445.035667	438.663268	520.895381	642.002342	391.583424	771.744075
MBURv2_tRNA39	884.540437	885.837114	887.716339	893.193527	863.189283	885.146760	885.457252	882.753338	871.371429	877.783238	...	897.804248	529.374047	627.316643	680.056539	835.597544	855.003043	589.737471	718.921385	767.269618	555.384858
MBURv2_tRNA4	26.117059	29.484939	25.927345	31.907818	52.600629	20.153934	26.930440	29.101352	43.521775	37.379907	...	0.000000	438.825428	319.976013	221.675919	79.331880	51.108117	359.763697	388.578615	162.922679	579.852901
MBURv2_tRNA40	419.208451	418.969169	422.591759	425.770363	392.874864	426.626657	419.792728	416.368657	402.952193	409.496997	...	438.825428	0.000000	177.771848	259.679241	367.531752	389.149017	173.087842	257.997541	333.585789	251.853620
MBURv2_tRNA41	302.921397	302.496445	306.608212	308.543398	279.141782	309.720338	302.463486	300.264236	288.522213	294.428108	...	319.976013	177.771848	0.000000	171.270321	262.597191	272.509764	72.889486	211.780618	207.629045	290.536740
MBURv2_tRNA42	208.678333	210.394089	211.532877	217.381023	191.489245	209.958893	209.973733	207.267639	198.545200	203.728809	...	221.675919	259.679241	171.270321	0.000000	163.613686	182.512782	200.596450	322.729339	139.946644	438.990067
MBURv2_tRNA43	61.568151	62.091403	63.378991	69.715512	46.816720	67.288269	63.109560	57.952570	55.964345	57.434688	...	79.331880	367.531752	262.597191	163.613686	0.000000	38.349715	299.803018	334.153508	124.675989	519.912965
MBURv2_tRNA44	34.697223	35.519929	38.293495	44.564529	22.642093	42.042220	34.868167	31.571145	28.080435	29.863178	...	51.108117	389.149017	272.509764	182.512782	38.349715	0.000000	312.294313	343.007076	128.543379	531.028663
MBURv2_tRNA5	345.013663	344.939793	348.362274	351.601790	322.403709	349.383536	344.580788	341.959065	332.237623	337.654534	...	359.763697	173.087842	72.889486	200.596450	299.803018	312.294313	0.000000	195.888914	225.452239	275.748471
MBURv2_tRNA6	373.585068	371.929211	376.123186	377.287805	350.375007	381.680300	372.060187	369.408899	360.649670	364.971505	...	388.578615	257.997541	211.780618	322.729339	334.153508	343.007076	195.888914	0.000000	288.852254	286.536007
MBURv2_tRNA7	158.429465	160.390886	162.422325	169.289609	146.718629	153.974846	159.402700	155.940651	152.974739	156.377022	...	162.922679	333.585789	207.629045	139.946644	124.675989	128.543379	225.452239	288.852254	0.000000	479.344639
MBURv2_tRNA8	560.223957	558.506612	562.705638	563.031356	533.465735	569.166693	558.712223	557.035614	543.894001	549.842995	...	579.852901	251.853620	290.536740	438.990067	519.912965	531.028663	275.748471	286.536007	479.344639	0.000000

	product	type	gene_symbol	locus	start_coord	end_coord	note	translation	5GB1_FM40_T0m_TR2	5GB1_FM40_T10m_TR3	5GB1_FM40_T20m_TR2	5GB1_FM40_T40m_TR1	5GB1_FM40_T60m_TR1	5GB1_FM40_T90m_TR2	5GB1_FM40_T150m_TR1_remake	5GB1_FM40_T180m_TR1
locus_tag
MBURv2_130823	conserved hypothetical protein; putative membr...	CDS	NaN	MBURv2	3057071	3057622	Evidence 4 : Homologs of previously reported g...	NaN	54.074431	42.385029	49.380410	45.924264	54.430108	52.911230	42.512312	49.776228
MBURv2_160196	DNA replication and repair protein RecF	CDS	recF	MBURv2	3476509	3477588	NaN	NaN	39.716085	33.586759	38.475054	34.019740	41.174621	39.432168	34.425668	39.151422
MBURv2_210033	histidyl tRNA synthetase	CDS	hisS	MBURv2	4129786	4131057	Evidence 2a : Function of homologous gene expe...	NaN	184.219401	153.374251	180.859518	160.574002	190.176907	182.024224	158.874769	178.341500
MBURv2_260039	hydroxymethylbilane synthase	CDS	hemC	MBURv2	5063172	5064098	Evidence 2a : Function of homologous gene expe...	NaN	186.700990	151.238240	172.944983	160.882580	188.444713	184.348282	153.582663	184.702961
MBURv2_60016	carbamoyl phosphate synthetase small subunit, ...	CDS	carA	MBURv2	1374314	1375447	Evidence 2a : Function of homologous gene expe...	NaN	417.342736	361.297562	405.602208	364.758505	454.132299	433.434899	357.685236	404.088838
MBURv2_60050	Na+/H+ antiporter NhaD	CDS	nhaD	MBURv2	1419064	1420479	NaN	NaN	196.773053	167.449917	183.707146	168.936189	219.249940	198.774079	178.440470	188.004395