In [1]:
using MethylClust
using PyPlot
In [2]:
# Simple fit to show that the code is working
A,P,c = generate_reads(N=100,W=30);
X,Y,ch = fit_soft(A,2)
Xd,Yd,Xr = MethylClust.eval_synthetic_results(A,X,Y,c,P;make_plots=true);
In [14]:
X_real_hard = zeros(100,2)
for i = 1:100
X_real_hard[i,indmax(Xr[i,:])] = 1.0
end
figure()
imshow(X_real_hard',cmap=ColorMap("Greys"),interpolation="none")
xlabel("true X (hard clustering)")
yticks([]),xticks([])
Out[14]:
In [4]:
Nlevs = [10]#[10:20:90]
Wlevs = [10]#[10:5:30]
nr = 3 # number of replicates
L = 100 # genome length
# Storage for X and Y errors/distance
Xd = zeros(length(Nlevs),length(Wlevs),nr)
Yd = zeros(length(Nlevs),length(Wlevs),nr)
for n = 1:length(Nlevs)
N = Nlevs[n]
for w = 1:length(Wlevs)
W = Wlevs[w]
println("Fitting with N = ",N," reads of length W = ",W,".")
for r = 1:nr
print("*")
A,P,c = generate_reads(N=N, W=W, L=L, Cprob=[0.5,0.5]);
X,Y,ch = fit_soft(A,2)
Xd[n,w,r],Yd[n,w,r] = MethylClust.eval_synthetic_results(X,Y,c,P;make_plots=false)
end
println()
end
end
In [5]:
Xdm = squeeze(mean(Xd,3),3)
imshow(Xdm,cmap=ColorMap("Greys"),interpolation="None")
xticks(0:length(Wlevs)-1,Wlevs),yticks(0:length(Nlevs)-1,Nlevs)
xlabel("Read Length"),ylabel("Number of Reads"),title("Xdist, Num Sites = "*string(L))
colorbar(label="proportion misclassified")
show()
In [6]:
N = repmat(Nlevs,1,length(Wlevs))
W = repmat(Wlevs',length(Nlevs),1)
figure()
scatter(vec(N).*vec(W)./L,vec(Xdm),c=vec(W),s=40,cmap=ColorMap("Reds_r"))
colorbar(ticks=[10,30],label="sites per read")
ylabel("proportion misclassified"), xlabel("coverage")
show()
In [7]:
Ydm = squeeze(mean(Yd,3),3)
imshow(Ydm,cmap=ColorMap("Greys"),interpolation="None")
xticks(0:length(Wlevs)-1,Wlevs),yticks(0:length(Nlevs)-1,Nlevs)
xlabel("Read Length"),ylabel("Number of Reads"),title("Ydist, Num Sites = "*string(L))
show()
In [8]:
figure()
scatter(vec(N).*vec(W)./L,vec(Ydm),c=vec(W),s=40,cmap=ColorMap("Reds_r"))
colorbar(ticks=[10,30],label="sites per read")
ylabel("reconstruction error"), xlabel("coverage")
show()
In [14]:
d = (Float64)[]
Xd = (Float64)[]
Yd = (Float64)[]
nr = 20
for dist = 0.5:0.5:5.0
println("Fitting with methylation profile approx dist = ",dist)
for _ = 1:nr
A,P,c = generate_reads(N=100, W=30, L=100, Cprob=[0.5,0.5], targ_dist=dist);
X,Y,ch = fit_soft(A,2)
xd,yd = MethylClust.eval_synthetic_results(X,Y,c,P;make_plots=false)
push!(d,MethylClust.d_js_multivar_bern(vec(P[1,:]),vec(P[2,:])))
push!(Xd,xd)
push!(Yd,yd)
print("*")
end
println()
end
In [17]:
figure()
plot(d,Xd,".k")
xlabel("Jensen-Shannon Distance Between Cell Types")
ylabel("Proportion Misclassified")
savefig("./figs/02_05.svg")
In [18]:
figure()
plot(d,Yd,".k")
xlabel("Jensen-Shannon Distance Between Cell Types")
ylabel("Reconstruction Error")
savefig("./figs/02_06.svg")
In [ ]: