In [1]:
# Libraries
from pytadbit import load_chromosome # to load chromosomes
from pytadbit.imp.impoptimizer import IMPoptimizer
In [2]:
# Load the chromosome
my_chrom = load_chromosome('some_path.tdb')
In [3]:
# Loop over experiments in chromosome and load Hi-C data.
res = 100000
for exp in my_chrom.experiments:
try:
exp.load_hic_data('../../scripts/sample_data/HIC_{0}_{1}_{1}_{2}_obs.txt'.format(
exp.name, my_chrom.name, res))
exp.normalize_hic(factor=1)
except IOError:
print 'file not found for experiment: ' + exp.name
continue
print exp
In [4]:
my_chrom.experiments['k562+gm06690'] = my_chrom.experiments['k562'] + my_chrom.experiments['gm06690']
We are now going to use the experiment "gm06690" for the modelling. The first thing we need to do is to filter columns with low counts:
In [5]:
exp = my_chrom.experiments['gm06690']
exp.filter_columns(draw_hist=True)
print my_chrom.experiments
In [6]:
optimizer = IMPoptimizer(exp, 100, 200, n_models=50, n_keep=25, cutoff=2000)
In [7]:
# Optimize parameters. Be aware that this step is CPU intensive. If you want to se the progress, set verbose=True.
optimizer.run_grid_search(n_cpus=8, lowfreq_range=(-1, 0, 0.2), upfreq_range=(0.2, 0.8, 0.2),
maxdist_range=(2000, 3500, 500), verbose=True)
In [8]:
#optimizer = exp.optimal_imp_parameters(100, 200, n_cpus=8, n_models=50, n_keep=25, cutoff=1000,
# lowfreq_range=(-1, 0, 0.2), upfreq_range=(0.2, 0.8, 0.2),
# maxdist_range=(2000, 3500, 500),
# verbose=False)
In [9]:
optimizer.write_result('results.log')
In [10]:
# Visualize the results of the optimization.
optimizer.plot_2d()
In [11]:
# Visualize the results of the optimization and mark the best 10 parameter sets
optimizer.plot_2d(show_best=20)
In [12]:
axes_range = [[float(i) for i in optimizer.scale_range],
[float(i) for i in optimizer.maxdist_range],
[float(i) for i in optimizer.upfreq_range],
[float(i) for i in optimizer.lowfreq_range]]
print axes_range
[round(i, 3) for i in axes_range[3]]
result = optimizer._result_to_array()
wax = [round(i, 3) for i in axes_range[0]]
zax = [round(i, 3) for i in axes_range[1]]
xax = [round(i, 3) for i in axes_range[3]]
yax = [round(i, 3) for i in axes_range[2]]
sort_result = sorted([(result[i, j, k, l], wax[i], zax[j], xax[l], yax[k])
for i in range(len(wax))
for j in range(len(zax))
for k in range(len(yax))
for l in range(len(xax))
if not np.isnan(result[i, j, k, l])
], key=lambda x: x[0],
reverse=True)[0]
print sort_result
In [13]:
# Visualize the results of the optimization based on the lowfreq parameter.
optimizer.plot_2d(axes=('upfreq', 'lowfreq', 'maxdist', 'scale'),show_best=10)
In [14]:
optimizer.plot_2d(skip={"scale":0.01}, show_best=10)
In [15]:
# Visualize the results of the optimization using a 3D representation with the three optimization parameters in the axis.
optimizer.plot_3d(axes=('scale', 'maxdist', 'upfreq', 'lowfreq'))
In [16]:
optimizer.run_grid_search(n_cpus=8, lowfreq_range=(-1., -0.0, 0.2), upfreq_range=(0.6, 1, 0.1),
scale_range=[0.01], maxdist_range=[2000,2250,2500,2750,3000], verbose=False)
In [17]:
optimizer.plot_2d()
In [18]:
optimizer.plot_2d(show_best=100)
In [19]:
optimizer.write_result('results.log')
In [22]:
optimizer2 = IMPoptimizer(exp, 100, 200, n_models=50, n_keep=25, cutoff=2000)
In [23]:
optimizer2.load_from_file('results.log')
In [24]:
optimizer2.results.keys()[105]
Out[24]:
In [25]:
optimizer2.plot_2d(show_best=20)
In [26]:
config = optimizer.get_best_parameters_dict(reference='gm cell from Job Dekker 2009')
print config