In [1]:
# Libraries
from pytadbit import load_chromosome # load chromosome
from pytadbit.imp.CONFIG import CONFIG # Pre-defined parameters for modeling
from pytadbit.imp.structuralmodels import load_structuralmodels # library for modeling.
# Load the chromosome
my_chrom = load_chromosome('some_path.tdb')
In [2]:
# Loop over experiments in chromosome and load Hi-C data.
res = 100000
for exp in my_chrom.experiments:
try:
exp.load_hic_data('../../scripts/sample_data/HIC_{0}_{1}_{1}_{2}_obs.txt'.format(
exp.name, my_chrom.name, res))
except:
print 'file not found for experiment: ' + exp.name
continue
# Load Hi-C of the individual experiments and put it into the sum experiment BR+TR1+TR2
my_chrom.experiments['k562+gm06690'].load_hic_data(
(my_chrom.experiments['k562'] + my_chrom.experiments['gm06690']).hic_data,
'k562+gm06690')
exp = my_chrom.experiments['gm06690']
exp.normalize_hic()
exp.filter_columns()
print my_chrom.experiments
In [3]:
# See pre-defined sets of parameters
CONFIG
Out[3]:
In [4]:
# Set of optimal parameters from pervious tutorial #2
optpar = {'kforce': 5,
'lowfreq': 0.0,
'lowrdist': 100,
'upfreq': 0.8,
'maxdist': 2750,
'scale': 0.01,
'reference': 'gm cell from Job Dekker 2009'}
# Build 3D models based on the HiC data. This is done by IMP.
models = exp.model_region(100, 200, n_models=500, n_keep=100, n_cpus=8, keep_all=True, config=optpar)
print models
In [5]:
models.experiment
Out[5]:
In [6]:
# Select top 10 models
models.define_best_models(10)
print "Lowest 10 IMP OF models:"
print models
# Select top 100 models
models.define_best_models(100)
print "Lowest 100 IMP OF models:"
print models
# Get the data for the lowest IMP OF model (number 0) in the set of models
model = models[0]
print model
In [7]:
# Get the IMP OF of the stored model in "model"
model.objective_function(log=True, smooth=False)
In [8]:
# Re-select again the top 1000 models
models.define_best_models(100)
# Calculate the correlation coefficient between a set of kept models and the original HiC matrix
models.correlate_with_real_data(plot=True, cutoff=1000)
Out[8]:
In [9]:
models.zscore_plot()
In [10]:
models.align_models(in_place=True)
models.deconvolve(fact=0.6, dcutoff=1000, represent_models='best', n_best_clusters=5)
In [11]:
# Cluster models based on structural similarity
models.cluster_models(fact=0.95, dcutoff=2000)
print models.clusters
In [12]:
# Plot the resulting clusers
cl = models.cluster_analysis_dendrogram(color=True)
In [13]:
# Show de dendogram for only the 5 top clusters and no colors
cl = models.cluster_analysis_dendrogram(n_best_clusters=5)
In [14]:
# Calculate the consistency plot for all models in the first cluster (cluster 0)
models.model_consistency(cluster=1, cutoffs=(1000,2000))
In [15]:
# Calculate a DNA density plot
models.density_plot()
In [16]:
# Get a similar plot for only the top cluster and show the standar deviation for a specific(s) running window (steps)
models.density_plot(cluster=1,error=True, steps=(5))
In [17]:
models.walking_angle(steps=(3, 5, 7), signed=False)
In [18]:
models.interactions(cutoff=2000)
In [19]:
# Get a contact map for the top 50 models at a distance cut-off of 300nm
models.contact_map(models=range(5,10), cutoff=1200, savedata="contact.txt")
In [20]:
# Correlate the contact map with the original input HiC matrix for cluster 1
models.correlate_with_real_data(cluster=1, plot=True, cutoff=1500)
# Correlate the contact map with the original input HiC matrix for cluster 2
models.correlate_with_real_data(cluster=2, plot=True, cutoff=1500)
# Correlate the contact map with the original input HiC matrix for cluster 10
models.correlate_with_real_data(cluster=10, plot=True, cutoff=1500)
Out[20]:
In [21]:
# Get the average distance between particles 13 and 30 in all kept models
models.median_3d_dist(13, 20, plot=False)
Out[21]:
In [22]:
# Plot the distance distributions between particles 13 and 30 in all kept models
models.median_3d_dist(15, 20, plot=True)
In [23]:
# Plot the distance distributions between particles 13 and 30 in the top 100 models
models.median_3d_dist(13, 30, models=range(100))
In [24]:
# Plot the distance distributions between particles 13 and 30 in the models from cluster 0
models.median_3d_dist(0, 54, plot=True, cluster=1)
In [25]:
# Save your entire analysis and models
models.save_models('gm_01.models')
In [26]:
# Load the models
loaded_models = load_structuralmodels('gm_01.models')
print loaded_models
In [27]:
# Write a CMM file for the top model
models.write_cmm(directory="./", model_num=0)
# Write CMM ofcentroid model
models.write_cmm(directory="./", model_num=models.centroid_model(cluster=1))
# Write a XYZ file for the top model
models.write_xyz(directory="./", model_num=0)
# Write a XYZ file for the top 10 models
models.write_xyz(directory="./", models=range(10))
# Write a XYZ file for the cluster 1 models
models.write_xyz(directory="./", cluster=1)
In [28]:
models.view_models(stress='centroid', tool='plot', figsize=(10,10), azimuth=-60, elevation=100)
Note that we represent with colors, only 1 model, the centromere. An easy way to get the centromere model is using the get_centromere funtion:
As models are linked to experiments, we can paint TADs in this visualization:
In [29]:
models.view_models(highlight='centroid', show='highlighted', tool='plot', figsize=(10,10), azimuth=-60, elevation=100, color='tad')
We can also color only the border, and in this view, borders are colored according to their score:
In [30]:
models.view_models(highlight='centroid', show='highlighted', tool='plot', figsize=(10,10), azimuth=-60, elevation=100, color='border')
In [31]:
print models[models.centroid_model(cluster=1)]
In [32]:
# Generate the image using Chimera in batch mode. That takes some time, wait a bit before running next command.
# You can check in your home directory whether this has finished.
models.view_models(models=[0], tool='chimera_nogui', savefig='/tmp/image_model_1.png')
In [33]:
## REMOVE
# THIS IS ONLY TO DISPLAY THE IMAGE IN iPhyton.
from IPython.display import Image
Image(filename='/tmp/image_model_1.png', width=654, height=454)
## STOP REMOVE
Out[33]:
In [34]:
# Generate the video using Chimera in batch mode. That takes SIGNIFICANT time, wait a bit before running next command.
# You can check in your home directory whether this has finished.
models.view_models(models=[0], tool='chimera_nogui', savefig='/tmp/image_model_1.webm')
In [35]:
## REMOVE
# THIS IS ONLY TO DISPLAY THE VIDEO IN iPhyton.
from IPython.display import HTML
video = open("/tmp/image_model_1.webm", "rb").read()
video_encoded = video.encode("base64")
video_tag = '''
<style media="screen" type="text/css">
#video{
width:654px;
height:454px;
}
</style>''' + '''
<video loop=1 id="video" controls="controls">
<source src="data:video/webm;base64,{0}" type="video/webm"/>
Your browser doesnt seems to support video tag
</video>'''.format(video_encoded)
HTML(video_tag)
## STOP REMOVE
Out[35]: