In [1]:
    
using MethylClust
using PyPlot
    
    
In [2]:
    
# Simple fit to show that the code is working
A,P,c = generate_reads(N=100,W=30);
X,Y,ch = fit_soft(A,2)
Xd,Yd,Xr = MethylClust.eval_synthetic_results(A,X,Y,c,P;make_plots=true);
    
    
    
    
    
    
In [14]:
    
X_real_hard = zeros(100,2)
for i = 1:100
    X_real_hard[i,indmax(Xr[i,:])] = 1.0
end
figure()
imshow(X_real_hard',cmap=ColorMap("Greys"),interpolation="none")
xlabel("true X (hard clustering)")
yticks([]),xticks([])
    
    
    Out[14]:
In [4]:
    
Nlevs = [10]#[10:20:90]
Wlevs = [10]#[10:5:30]
nr = 3 # number of replicates
L = 100 # genome length
# Storage for X and Y errors/distance
Xd = zeros(length(Nlevs),length(Wlevs),nr)
Yd = zeros(length(Nlevs),length(Wlevs),nr)
for n = 1:length(Nlevs)
    N = Nlevs[n]
    for w = 1:length(Wlevs)
        W = Wlevs[w]
        println("Fitting with N = ",N," reads of length W = ",W,".")
        for r = 1:nr
            print("*")
            A,P,c = generate_reads(N=N, W=W, L=L, Cprob=[0.5,0.5]);
            X,Y,ch = fit_soft(A,2)
            Xd[n,w,r],Yd[n,w,r] = MethylClust.eval_synthetic_results(X,Y,c,P;make_plots=false)
        end
        println()
    end
end
    
    
In [5]:
    
Xdm = squeeze(mean(Xd,3),3)
imshow(Xdm,cmap=ColorMap("Greys"),interpolation="None")
xticks(0:length(Wlevs)-1,Wlevs),yticks(0:length(Nlevs)-1,Nlevs)
xlabel("Read Length"),ylabel("Number of Reads"),title("Xdist, Num Sites = "*string(L))
colorbar(label="proportion misclassified")
show()
    
    
In [6]:
    
N = repmat(Nlevs,1,length(Wlevs))
W = repmat(Wlevs',length(Nlevs),1)
figure()
scatter(vec(N).*vec(W)./L,vec(Xdm),c=vec(W),s=40,cmap=ColorMap("Reds_r"))
colorbar(ticks=[10,30],label="sites per read")
ylabel("proportion misclassified"), xlabel("coverage")
show()
    
    
In [7]:
    
Ydm = squeeze(mean(Yd,3),3)
imshow(Ydm,cmap=ColorMap("Greys"),interpolation="None")
xticks(0:length(Wlevs)-1,Wlevs),yticks(0:length(Nlevs)-1,Nlevs)
xlabel("Read Length"),ylabel("Number of Reads"),title("Ydist, Num Sites = "*string(L))
show()
    
    
In [8]:
    
figure()
scatter(vec(N).*vec(W)./L,vec(Ydm),c=vec(W),s=40,cmap=ColorMap("Reds_r"))
colorbar(ticks=[10,30],label="sites per read")
ylabel("reconstruction error"), xlabel("coverage")
show()
    
    
In [14]:
    
d = (Float64)[]
Xd = (Float64)[]
Yd = (Float64)[]
nr = 20
for dist = 0.5:0.5:5.0
    println("Fitting with methylation profile approx dist = ",dist)
    for _ = 1:nr
        A,P,c = generate_reads(N=100, W=30, L=100, Cprob=[0.5,0.5], targ_dist=dist);
        X,Y,ch = fit_soft(A,2)
        xd,yd = MethylClust.eval_synthetic_results(X,Y,c,P;make_plots=false)
        push!(d,MethylClust.d_js_multivar_bern(vec(P[1,:]),vec(P[2,:])))
        push!(Xd,xd)
        push!(Yd,yd)
        print("*")
    end
    println()
end
    
    
In [17]:
    
figure()
plot(d,Xd,".k")
xlabel("Jensen-Shannon Distance Between Cell Types")
ylabel("Proportion Misclassified")
savefig("./figs/02_05.svg")
    
    
In [18]:
    
figure()
plot(d,Yd,".k")
xlabel("Jensen-Shannon Distance Between Cell Types")
ylabel("Reconstruction Error")
savefig("./figs/02_06.svg")
    
    
In [ ]: