In [87]:
from collections import namedtuple
from itertools import cycle
from os.path import join, exists
from os import makedirs, chdir,getcwd,fchmod
from glob import glob
import shutil
import time
import numpy as np
import pandas as pd
import re
import scipy as sc
from scipy.stats import norm
from scipy.stats import expon
import scipy.interpolate as interpolate
import scipy.integrate
from scipy.fftpack import fft, ifft
from lmfit.models import Model
from lmfit import conf_interval
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import MutableSeq
from Bio.Alphabet import IUPAC
import docker
import configparser
import seaborn as sns
sns.set_context("paper")
In [44]:
def create_mount_run(image,mount_dir,cmd,envs):
container = cli.create_container(
image=image, command=cmd, volumes=['/mnt/vol'],
host_config=cli.create_host_config(binds={
mount_dir: {
'bind': '/mnt/vol',
'mode': 'rw',
}
}),
environment=envs
)
ctr=container.get('Id')
cli.start(ctr)
cli.wait(ctr,60*60*24*10)
return cli.logs(ctr)
In [45]:
def Gekv(B,C):
return 2**(1 - B)/(C*np.log(2))*(1 - 2**(-C))
In [46]:
def Gekv(B,C):
return 2**(1 - B)/(C*np.log(2))*(1 - 2**(-C))
In [47]:
class Community:
"A community defined by genome references (Biopython SeqRecords) and corresponding growth parameters."
def __init__(self,name,acc,growth_param,td,mapper,env):
self.name = name
self.conf = local_conf(join(td,name),mapper)
self.d_conf = local_conf(join('/mnt/vol',name),mapper)
self.args0=['/home/Programs/PTR-Pipeline/ptr_pipeline.py','-c',join(self.d_conf['node_path'],'project.conf'),'-e','hedani@chalmers.se']
self.env=env
self.create_dirs(True)
save_config(self.conf,self.d_conf)
self.fetch_ref(acc)
self.pop = self.init_population(acc,growth_param)
self.distribution = self.community_distribution()
self.samples = []
def init_population(self,acc,growth_param):
records=open_records(glob(join(self.conf['ref_path'],'Fasta','*.fasta')))
population = namedtuple("population", "B C D l seq cells")
#refId=[x.id for x in records]
#acc=keys(comm)
#refInd=[acc.index(x) for x in refId]
pop = {}
#for i,rec in enumerate(records):
# add [:-2] to a if . removed
for i,a in enumerate(acc):
pop[a]=population(B = growth_param[i][0],C = growth_param[i][1],
D = growth_param[i][2], l = len(records[a]),
seq = records[a], cells=growth_param[i][3])
return pop
def ptr(self):
for i,a in enumerate(self.pop.keys()):
print a+": "+str(2**growth_param[i][1])
def community_distribution(self):
d=np.array([Gekv(p.B,p.C)*p.l*p.cells for p in self.pop.values()])
return d/d.sum()
#def ab_(self):
# return
def sample(self,nr_samples):
nr_samples=np.array(nr_samples*self.distribution)
nr_samples=nr_samples.astype(np.int)
samp_run=[]
for i,p in enumerate(self.pop.keys()):
samp_run.append(inverse_transform_sampling(self.pop[p].C,nr_samples[i],self.pop[p].l))
self.samples.append(samp_run)
def write_reads(self):
for i,samp in enumerate(self.samples):
write_reads(samp,self,self.conf['data_path'],self.name+str(i))
def compare_fit(self):
if not self.samples:
print "The community is not sampled, please run community.sample(nr_samples)"
return;
err_hfit=[]
err_pfit=[]
res_fit=[]
for i,samp in enumerate(self.samples):
for acc in self.pop.keys():
try:
depth_file=join(self.conf['output_path'],self.name+str(i),'npy',acc+'.depth.npy')
best_file=join(self.conf['output_path'],self.name+str(i),'npy',acc+'.depth.best.npy')
signal=2**(np.load(depth_file))
signal=signal/(signal.sum()/len(signal))#*self.pop[acc].l)
from_ptr=np.load(best_file)
res=fit_signal(signal,self.pop[acc].l)
res_fit.append(res)
err_hfit.append((self.pop[acc].C-res.best_values['C'])/self.pop[acc].C)
err_pfit.append((self.pop[acc].C-from_ptr[2]+from_ptr[3])/self.pop[acc].C)
print "Simulated value: "+str(self.pop[acc].C)
print "Error from this fit: "+str(err_hfit[-1])+ ' value: ' + str(res.best_values['C'])
print "Error from initial PTR fit "+str(err_pfit[-1])+' value: ' + str(from_ptr[2]-from_ptr[3])
except Exception as Ex:
print "Ex"
pass
return [res_fit,err_hfit,err_pfit]
def fetch_ref(self,acc=''):
acc_path = join(self.conf['node_path'],"acc")
f = open(acc_path, "w")
if not acc:
f.write("\n".join(self.pop.keys()))
else:
f.write("\n".join(acc))
f.close()
create_mount_run('centos/hedani',td,self.args0+['fetch-references','-s',join(self.d_conf['node_path'],'acc')],self.env)
def build_index(self):
create_mount_run('centos/hedani',td,self.args0+['build-index'],self.env)
def run_pipeline(self):
create_mount_run('centos/hedani',td," ".join(['/bin/bash -c "']+self.args0+['make']+[';']+self.args0+['run"']),self.env)
def collect(self):
create_mount_run('centos/hedani',td,self.args0+['collect'],self.env)
def create_dirs(self,from_init=False):
if exists(self.conf['node_path']) and from_init:
shutil.rmtree(self.conf['node_path'])
for d in [self.conf['node_path'],self.conf['data_path'],self.conf['output_path'],self.conf['ref_path']]:
if not exists(d):
makedirs(d)
#def complexity(self):
#len(self.pop)
#return complexity
In [48]:
import numpy as np
def cartesian(arrays, out=None):
"""
Generate a cartesian product of input arrays.
Parameters
----------
arrays : list of array-like
1-D arrays to form the cartesian product of.
out : ndarray
Array to place the cartesian product in.
Returns
-------
out : ndarray
2-D array of shape (M, len(arrays)) containing cartesian products
formed of input arrays.
Examples
--------
>>> cartesian(([1, 2, 3], [4, 5], [6, 7]))
array([[1, 4, 6],
[1, 4, 7],
[1, 5, 6],
[1, 5, 7],
[2, 4, 6],
[2, 4, 7],
[2, 5, 6],
[2, 5, 7],
[3, 4, 6],
[3, 4, 7],
[3, 5, 6],
[3, 5, 7]])
"""
arrays = [np.asarray(x) for x in arrays]
dtype = arrays[0].dtype
n = np.prod([x.size for x in arrays])
if out is None:
out = np.zeros([n, len(arrays)], dtype=dtype)
m = n / arrays[0].size
out[:,0] = np.repeat(arrays[0], m)
if arrays[1:]:
cartesian(arrays[1:], out=out[0:m,1:])
for j in xrange(1, arrays[0].size):
out[j*m:(j+1)*m,1:] = out[0:m,1:]
return out
In [49]:
def pxn(x, C, l):
return (2**(1 + C - (2*C*x)/float(l))*C*np.log(2))/(float(l)*(-1 + 2**C));
In [50]:
def pxn2(x, C):
return (2**(1 + C - (2*C*x))*C*np.log(2))/((-1 + 2**C));
In [51]:
def piecewise_prob(x,C,l):
conds=[(0 < x) & (x <= float(l)/2), x > float(l)/2]
funcs=[lambda x: pxn(x,C,l)/float(2),lambda x: pxn((l-x),C,l)/float(2)]
return np.piecewise(x,conds,funcs)
In [52]:
def piecewise_prob2(x,C):
conds=[(0 < x) & (x <= float(1)/2), x > float(1)/2]
funcs=[lambda x: pxn(x,C,1)/float(2),lambda x: pxn2((1-x),C,1)/float(2)]
return np.piecewise(x,conds,funcs)
In [53]:
def inverse_transform_sampling(C,n_samples,l=1):
x = np.linspace(0, float(1)/1.7, 100)
y = np.linspace(pxn(.5,C,1),pxn(0,C,1),100)
pdf=interpolate.interp1d(x,pxn(x,C,1))
cdf = [scipy.integrate.quad(lambda x: pdf(x),0,i)[0] for i in np.linspace(0,.5,100)]
inv_cdf = interpolate.interp1d(cdf, np.linspace(0,.5,100))
r = np.random.rand(n_samples)
v=l*np.round(inv_cdf(r[:int(n_samples/2)]),4)
v2=l*(1-np.round(inv_cdf(r[int(n_samples/2):]),4))
v=np.concatenate([v,v2])
return v.astype(np.int)
In [54]:
def read_pair(header,ind,seq,pos,l):
def circular_yield(x,pos,sub,l):
if (pos>l):
return x[pos-l:pos-l+sub]
elif (pos+sub>l):
r=pos+sub-l
x2=x[pos:pos+sub-r]
return x2+x[0:r]
else:
return x[pos:pos+sub]
#base_error_rate = .02
#mutation_rate = .001
#nuc=["A","C","G","T"]
# generate a normally distributued insert size of mean 300 and sd 40
#insert_size = int(norm.rvs(size=1,loc=300,scale=30)[0])
insert_size = int(500)
r1 = circular_yield(seq,pos,100,l)
r2 = circular_yield(seq,pos+insert_size,100,l)
# flip reads according to seqs error and mutation rate
#ra=[]
#for r in [r1,r2]:
# rr=np.random.random(100)
# m_ind=[i for i,j in enumerate(rr) if j < base_error_rate]
# char=np.random.choice(nuc,len(m_ind))
# a=list(r)
# for i,ind in enumerate(m_ind):
# a[ind]=char[i]
# ra.append("".join(a))
#r_tmp=r_tmp[:m_ind[0]]
#for i,index in enumerate(m_ind[1:]):
# r_tmp = r_tmp[:index] + char[i] + r[index + 1:]
#[r1,r2]=ra
#nrs=[str(np.random.randint(low=1000,high=2000)),str(np.random.randint(low=10**4,high=2*10**4)),str(np.random.randint(low=10**5,high=2*10**5))]
rec1=SeqRecord(r1,id=header+"_"+str(pos)+"."+str(ind),description="RANDXXLAB"+str(pos)+"_"+str(pos+insert_size+100)+":0:0"+"/1")
rec2=SeqRecord(r2,id=header+"_"+str(pos)+"."+str(ind),description="RANDXXLAB"+str(pos)+"_"+str(pos+insert_size+100)+":0:0"+"/2")
#rec1=SeqRecord(r1,id=header+"."+str(ind),description="FCB00YLABXX:6:"+nrs[0]+":"+nrs[1]+":"+nrs[2]+"/1")
#rec2=SeqRecord(r2,id=header+"."+str(ind),description="FCB00YLABXX:6:"+nrs[0]+":"+nrs[1]+":"+nrs[2]+"/2")
#rec1=SeqRecord(r1,id=header+"_"+str(pos)+"_"+str(pos+insert_size)+"/1",description="")
#rec2=SeqRecord(r2,id=header+"_"+str(pos)+"_"+str(pos+insert_size)+"/2",description="")
rec2=rec2.reverse_complement(id=header+"_"+str(pos)+"."+str(ind),description="RANDXXLAB"+str(pos)+"_"+str(pos+insert_size+100)+":0:0"+"/2")
#rec2=SeqRecord(r2,id=header+"."+str(ind)+"/2",description="")
rec1.letter_annotations['phred_quality']=[17]*100
rec2.letter_annotations['phred_quality']=[17]*100
#rec1.description=
return(rec1,rec2)
In [55]:
def open_records(fasta):
records={};
for f in fasta:
handle = open(f, "rU")
tmp=list(SeqIO.parse(handle, "fasta"))
m=re.match('.*(N[CTZ]_([A-Z]{2})*[0-9]{6}).*',tmp[0].id)
records[m.group(0)]=tmp[0].seq
handle.close()
return records
In [56]:
def write_reads(samples,comm,directory,name):
f1 = open(join(directory,name+"_1.fastq"), "w")
f2 = open(join(directory,name+"_2.fastq"), "w")
for i,p in enumerate(comm.pop.keys()):
for j,pos in enumerate( samples[i].tolist()):
r1,r2 = read_pair(name+"_"+p,str(j+1),comm.pop[p].seq,pos,comm.pop[p].l)
SeqIO.write(r1, f1, "fastq")
SeqIO.write(r2, f2, "fastq")
f1.close()
f2.close()
In [57]:
def plot_samples(samples):
y=[np.bincount(s) for s in samples]
for p in y:
plt.plot(p/float(p.sum()),marker='.',linestyle='None')
In [58]:
def local_conf(td,mapper):
return {
'project': 'ptr_simulation',
'cluster': '',
'job_name': 'ptr_simulation',
'job_nodes': '1',
'cpu_cores': '4',
'estimated_time': '',
'node_path': td,
'ref_path': join(td,'References'),
'data_path': join(td,'Data'),
'output_path': join(td,'Out'),
'doric_path': join(td,'DoriC'),
'mapper': mapper,
'ref_name': 'sim',
'nr_samples': '1',
'samples_per_node': '1',
'email': 'hedani@chalmers.se',
'data_prefix': '',
'start_ind': '1',
'job_range': '1',
'ftp_url': '',
'data_url': ''
}
In [59]:
def run_community_analysis(exec_string,td,mapper,acc,growth_param,community,nr_samples,topname):
chdir(td)
comm=[]
docker_td='/mnt/vol'
for i,c in enumerate(community):
accs=[acc[x] for x in c]
grs=[growth_param[x] for x in c]
wd='comm_'+str(topname)+str(i)
docker_fd=join(docker_td,wd)
fd=join(td,wd)
# create dirs
if 'd' in exec_string:
if exists(fd):
shutil.rmtree(fd)
for d in [wd,join(wd,'Data'),join(wd,'Out')]:
if not exists(d):
makedirs(d)
chdir(wd)
lconf=local_conf(fd,mapper)
conf=local_conf(docker_fd,mapper)
save_config(lconf,conf)
args0=['/home/Programs/PTR-Pipeline/ptr_pipeline.py','-c',join(conf['node_path'],'project.conf'),'-e','hedani@chalmers.se']
if 'f' in exec_string:
f = open("acc", "w")
f.write("\n".join(accs))
f.close()
create_mount_run('centos/hedani',td,args0+['fetch-references','-s',join(conf['node_path'],'acc')])
if 's' in exec_string:
# create community and sample
# name,acc,growth_param,td,mapper
comm.append(Community(wd,accs,grs,td,'bowtie2'))
comm[i].sample(nr_samples)
comm[i].write_reads()
if 'b' in exec_string: create_mount_run('centos/hedani',conf['node_path'],args0+['build-index'])
if 'm' in exec_string: create_mount_run('centos/hedani',conf['node_path'],args0+['make'])
if 'r' in exec_string: create_mount_run('centos/hedani',conf['node_path'],args0+['run'])
#chdir('..')
return comm
In [60]:
def fit_signal(signal,l):
x1=np.linspace(0,1,len(signal))
piecewiseModel=Model(piecewise_prob)
piecewiseModel.set_param_hint('l', value=1,vary=False)
piecewiseModel.set_param_hint('C', vary=True, value=.1,min=0,max=1)
piecewiseModel.make_params()
res=piecewiseModel.fit(signal,x=x1,weights=np.sin(1.5*x1)+1.5)
return res
In [61]:
def save_config(lconf,conf):
Config = configparser.ConfigParser()
Config.optionxform = str
cfgfile = open(join(lconf['node_path'],'project.conf'),'w')
Config.add_section('Project')
Config.set('Project','ProjectID',conf['project'])
Config.set('Project','Cluster',conf['cluster'])
Config.set('Project','JobName',conf['job_name'])
Config.set('Project','JobNodes',conf['job_nodes'])
Config.set('Project','CpuCores',conf['cpu_cores'])
Config.set('Project','EstimatedTime',conf['estimated_time'])
Config.add_section('Directories')
Config.set('Directories','Node',conf['node_path'])
Config.set('Directories','References',conf['ref_path'])
Config.set('Directories','Data',conf['data_path'])
Config.set('Directories','Output',conf['output_path'])
Config.set('Directories','DoriC',conf['doric_path'])
Config.add_section('Other')
Config.set('Other','Mapper',conf['mapper'])
Config.set('Other','RefName',conf['ref_name'])
Config.set('Other','NrSamples',conf['nr_samples'])
Config.set('Other','SamplesPerNode',conf['samples_per_node'])
Config.set('Other','Email',conf['email'])
Config.set('Other','DataPrefix',conf['data_prefix'])
Config.set('Other','StartInd',conf['start_ind'])
Config.set('Other','JobRange',conf['job_range'])
Config.set('Other','FtpURL',conf['ftp_url'])
Config.set('Other','DataURL',conf['data_url'])
Config.write(cfgfile)
cfgfile.close()
In [62]:
def mutation_run():
rec.seq=mutateSeq(mutable_seq,.01)
MutableSeq(str(rec.seq), IUPAC.unambiguous_dna)
rec.id="mut_"+rec.id
rec.description="mut_"+rec.description
rec.name="mut_"+rec.name
with open(sys.argv[2], "w") as output_handle:
SeqIO.write(rec, output_handle, "fasta")
In [75]:
def compare_fit(c):
err_hfit=[]
err_pfit=[]
res_fit=[]
for i,samp in enumerate(c.samples):
res_fit_tmp=[]
err_hfit_tmp=[]
err_pfit_tmp=[]
for acc in c.pop.keys():
try:
depth_file=join(c.conf['output_path'],c.name+str(i),'npy',acc+'.depth.npy')
best_file=join(c.conf['output_path'],c.name+str(i),'npy',acc+'.depth.best.npy')
signal=2**(np.load(depth_file))
signal=signal/(signal.sum()/len(signal))#*self.pop[acc].l)
# extra deblurring
signal=smooth_signal(signal,10**3)
signal=sharpen_filter(signal)
from_ptr=np.load(best_file)
res=fit_signal(signal,c.pop[acc].l)
res_fit_tmp.append(res.best_values['C'])
err_hfit_tmp.append((c.pop[acc].C-res.best_values['C'])/c.pop[acc].C)
err_pfit_tmp.append((c.pop[acc].C-from_ptr[2]+from_ptr[3])/c.pop[acc].C)
#print "Simulated value: "+str(c.pop[acc].C)
#print "Error from this fit: "+str(err_hfit_tmp[-1])+ ' value: ' + str(res.best_values['C'])
#print "Error from initial PTR fit "+str(err_pfit_tmp[-1])+' value: ' + str(from_ptr[2]-from_ptr[3])
except Exception as Ex:
print Ex
pass
res_fit.append(res_fit_tmp)
err_hfit.append(err_hfit_tmp)
err_pfit.append(err_pfit_tmp)
return [res_fit,np.abs(err_hfit),np.abs(err_pfit)]
In [64]:
def compare_fit_ptrc(c):
ptrc=[]
ptre=[]
for i,samp in enumerate(c.samples):
ptrc_tmp=[]
ptre_tmp=[]
cmd=['/bin/bash -c "']+['cd /mnt/vol ; /home/Programs/PTR-Pipeline/extra/ptrc_commands.sh']+[c.name+str(i)+'_1.fastq']+[c.name+str(i)+'_2.fastq"']
create_mount_run('centos/hedani',td," ".join(cmd),envs)
ptr_file=join(c.conf['output_path'],'ptrc_out')
ptrc_frame=pd.read_pickle(ptr_file)
ind_val=ptrc_frame.index.values
for i in ind_val:#range(ptrc_frame.shape[0]):
ptrc_tmp.append(ptrc_frame.loc[i]['sm'])#.ix[i]['sm'])
ptre_tmp.append(np.abs(np.log2(ptrc_frame.loc[i]['sm'])-c.pop[i[:-1]].C)/c.pop[i[:-1]].C)
ptrc.append(ptrc_tmp)
ptre.append(ptre_tmp)
return [np.log2(ptrc),ptre]
In [65]:
def mv_filt(L,omega):
return (1/float(L))*(1-np.exp(-omega*1j*L))/(1-np.exp(-omega*1j))
In [66]:
def smooth_signal(signal,length):
xi=np.linspace(0,1,len(signal))
xi2=np.linspace(0,1,length)
iss=scipy.interpolate.UnivariateSpline(xi,signal,k=3)
iss.set_smoothing_factor(.00001)
return iss(xi2)
In [67]:
def sharpen_filter(signal):
x = np.linspace(-0.00001*np.pi/float(1), 1*np.pi/float(1),10**3)
return np.roll(ifft(fft(signal,10**3)/mv_filt(1.27,x))[0:len(signal)],np.int64(-125/2))
In [68]:
def triangle(length, amplitude, y_offset):
section = length // 4
for direction in (1, -1):
for i in range(section):
yield y_offset + i * (amplitude / section) * direction
for i in range(section):
yield y_offset + (amplitude - (i * (amplitude / section))) * direction
In [ ]:
In [ ]:
df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB'))
df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'))
df.append(df2)
In [ ]:
real_c=np.tile([growth_param[sel[0][1]][1],growth_param[sel[0][0]][1]],(5,1))
ptrc_e=np.abs(np.log2(ptrc)-real_c)/real_c
In [31]:
create_mount_run('centos/hedani',td," ".join(cmd),envs)
Out[31]:
In [747]:
cmd
Out[747]:
In [29]:
cmd=['/bin/bash -c "']+['cd /mnt/vol ; /home/Programs/PTR-Pipeline/extra/ptrc_commands.sh']+[c.name+str(0)+'_1.fastq']+[c.name+str(0)+'_1.fastq"']
In [30]:
" ".join(cmd)
Out[30]:
In [35]:
ptrc=pd.read_pickle(ptr_file)
In [36]:
ptrc.index.values
Out[36]:
In [34]:
ptr_file=join(c.conf['output_path'],'ptrc_out')
In [716]:
ptrc.loc['Escherichia coli\n']['sm']
Out[716]:
In [662]:
ptrc=compare_fit_ptrc(c)
In [685]:
In [69]:
acc=['NC_000913.3','NC_007779.1','NC_002655.2','NC_009614.1','NC_017218.1']
growth_param=[[.2, .5, .3, 100],[.1, .7, .2, 200],[.2, .8, .0001,250],[0.0001, .8, .2, 250],[.1, .7, .2,150]]
sel=[[0]]
td='/Users/Shared/CommTest'
In [70]:
cli = docker.Client(base_url='unix://var/run/docker.sock')
envs = docker.utils.parse_env_file(join(td,'env'))
In [71]:
totH=[]
totK=[]
for i in range(2):
c=Community('comm0',[acc[s] for s in sel[0]],[growth_param[s] for s in sel[0]],td,'bowtie2',envs)
c.build_index()
tot_reads=np.linspace(5*10**3,10**4,2)
for nr in tot_reads:
c.sample(nr)
c.write_reads()
totK.append(compare_fit_ptrc(c))
#[k_c,k_err]=
c.run_pipeline()
#[res,hfit,pfit]=
totH.append(compare_fit(c))
In [85]:
totH[0]
Out[85]:
In [88]:
tips = sns.load_dataset("tips")
In [726]:
[k_c,k_err]=compare_fit_ptrc(c)
In [708]:
plt.plot(tot_reads,np.mean(hfit,1),tot_reads,np.mean(ptrc_e,1))#,tot_reads,[r[0].best_values['C'],r[1].best_values['C'] for r in res])
legend = plt.legend(['H','K'])
#ax = plt.axes()
#ax.set_title('Mean absolute error')
#g.fig.suptitle('THIS IS A TITLE, YOU BET')
In [ ]:
sns_plot.savefig("output.pdf")
In [280]:
args0=['/home/Programs/PTR-Pipeline/ptr_pipeline.py','-c',join('/mnt/vol','comm0','project.conf'),'-e','hedani@chalmers.se']
create_mount_run('centos/hedani',td," ".join(['/bin/bash -c "']+args0+['make']+[';']+args0+['run"']),envs)
Out[280]:
In [507]:
res=fit_signal(st,1)
err_hfit=((.5-res.best_values['C'])/.5)
err_hfit
Out[507]:
In [464]:
res.best_values['C']
Out[464]:
In [496]:
#depth_file=join(c.conf['output_path'],c.name+'0','npy',c.pop.keys()[0]+'.depth.npy')
depth_file='/Users/Shared/CommTest/comm0/Out/comm00/npy/NC_000913.3.depth.npy'
In [508]:
signal=2**np.load(depth_file)
signal=signal/(signal.sum()/len(signal))
In [509]:
plt.plot(signal)
Out[509]:
In [429]:
x = np.linspace(-2*np.pi, 2*np.pi, len(signal))
In [339]:
xx=np.linspace(-np.pi, np.pi, 1000)
plt.plot(np.abs(mv_filt(1.2,xx)))
Out[339]:
In [401]:
y2=[]
x2=np.linspace(0,.5,100)
y2=np.append(pxn2(x2,.5),pxn2(x2,.5)[::-1])
x2=np.linspace(0,1,200)
y=list(triangle(1000, 0.1,10))
x=range(len(y))
x_p = np.linspace(-2*np.pi, 2*np.pi, len(y2))
f=mv_filt(20,x_p)
fft_y=fft(y2)*f
smooth_y=np.roll(ifft(fft_y),0)
smooth_y=smooth_y/np.sum(smooth_y)
x3=np.linspace(0,1,len(y_long))
y_skum=y_long/(np.sum(y_long+np.mean(y_long)))
x_p2 = np.linspace(-2*np.pi, 2*np.pi, len(y_skum))
plt.plot(x2,y2/np.sum(y2),x2,smooth_y,x2,ifft(fft(smooth_y)/f))
Out[401]:
In [505]:
xi=np.linspace(0,1,len(signal))
xi2=np.linspace(0,1,10**3)
iss=scipy.interpolate.UnivariateSpline(xi,signal,k=3)
iss.set_smoothing_factor(.00001)
y_long=iss(xi2)
plt.plot(y_long)
Out[505]:
In [208]:
plt.plot(np.abs(ifft(fft(signal,10**4))[0:len(signal)]))
Out[208]:
In [512]:
signal=y_long
x = np.linspace(-0.00001*np.pi/float(1), 1*np.pi/float(1),10**3)
st=np.roll(ifft(fft(signal,10**3)/mv_filt(1.27,x))[0:len(signal)],np.int64(-125/2))
x2=np.linspace(0,1,len(st))
x1=np.linspace(0,1,len(signal))
plt.plot(x2,st/np.sum(st),x1,signal/np.sum(signal))
Out[512]:
In [514]:
plt.plot(x2,st/np.sum(st),x1,signal/np.sum(signal))
Out[514]:
In [193]:
plt.plot(np.abs(mv_filt(1*10**1,x)))
Out[193]:
In [411]:
np.sqrt(-1)
Out[411]:
In [ ]:
1+np.exp()
In [398]:
np.exp(1)
Out[398]:
In [272]:
create_mount_run('centos/hedani',td," ".join(['/bin/bash -c "']+['which python"']),envs)
Out[272]:
In [52]:
tot_reads=[10**5]#np.linspace(5*10**3,10**4,15)
c=Community('comm0',[acc[s] for s in sel[0]],[growth_param[s] for s in sel[0]],td,'bowtie2')
c.build_index()
for nr in tot_reads:
c.sample(nr)
c.write_reads()
c.run_pipeline()
[res,hfit,pfit]=c.compare_fit()
In [ ]:
In [247]:
community=[]
nrs=np.linspace(10**5,10**5,1)
res=[]
for nr in nrs:
community.append(run_community_analysis('fs',td,'bowtie2',acc,growth_param,sel,nr,0))
#res.append(community[i][0].compare_fit())
In [73]:
c=Community('comm0',[acc[s] for s in sel[0]],[growth_param[s] for s in sel[0]],td,'bowtie2')
In [47]:
tot_reads=[10**5]#np.linspace(5*10**3,10**4,15)
c=Community('comm0',[acc[s] for s in sel[0]],[growth_param[s] for s in sel[0]],td,'bowtie2')
c.build_index()
In [2132]:
n=130;
D1_snok=np.random.randint(30,40);C1_snok=np.random.randint(40,70);tau1_snok=np.random.randint(130,250,n)
#tau1_snok[-1]=50;
D2_snok=10.;C2_snok=35.;tau2_snok=np.random.randint(60,200,n)
ab1=[np.random.rand() for x in range(n)];ab2=np.random.randint(100,200,n);
org1=[[np.max([1-(C1_snok+D1_snok)/float(tau1_snok[i]),0]),(C1_snok)/float(tau1_snok[i]), (D1_snok)/float(tau1_snok[i]),ab1[i]] for i in range(tau1_snok.size)]
org2=[[1-(C2_snok+D2_snok)/float(tau2_snok[i]),(C2_snok)/float(tau2_snok[i]), (D1_snok)/float(tau2_snok[i]),ab2[i]/float(ab1[i])] for i in range(tau2_snok.size)]
#same_org[0][0]=0
In [2133]:
print D1_snok,C1_snok
In [2134]:
same_org=np.concatenate((org1,org2))
In [2135]:
Ri1
Out[2135]:
In [2136]:
Ri1=np.array([Gekv2(p[1],p[2])*1*p[3] for p in org1])
rs=np.random.randint(2,3,n);
for i in range(n):
Ri1[i]=Ri1[i]/rs[i];
org1[i][3]=org1[i][3]/rs[i];
Ki1=np.array([(2**(p[1])-1)/(float(p[1])*np.log(2)) for p in org1])
Ri2=np.array([Gekv2(p[1],p[2])*1*p[3] for p in org2])
Ki2=np.array([(2**(p[1])-1)/(float(p[1])*np.log(2)) for p in org2])
In [2137]:
org1
Out[2137]:
In [1887]:
np.array([Gekv2(p[1],p[2])*1*p[3] for p in org1])
Out[1887]:
In [1888]:
[2**(p[2])*((2**(p[1])-1)/(float(p[1])*np.log(2)))*1*p[3]/400. for p in same_org]
Out[1888]:
In [939]:
cip
Out[939]:
In [546]:
[c[1]/float(c[0]) for c in cip]
Out[546]:
In [545]:
[t[0]/float(t[1]) for t in tip]
Out[545]:
In [2138]:
Rip1=create_pairs(Ri1)
Kip1=create_pairs(Ki1)
cip1=create_pairs([p[1] for p in org1])
tip1=create_pairs([t for t in tau1_snok])
Aip1=create_pairs([p[3] for p in org1])
Rip2=create_pairs(Ri2)
Kip2=create_pairs(Ki2)
cip2=create_pairs([p[1] for p in org2])
tip2=create_pairs([t for t in tau2_snok])
Aip2=create_pairs([p[3] for p in org2])
inde=np.triu_indices(len(Ri1),k=1)
#Aa=np.zeros(len(Rip))
#for i in len(cip):
# Aa[i]=A_snok(Rip[i],Kip[i],[1,1],cip[i])
#M=np.zeros(len(sel[0]))
#for i in len(arrC):
# M[i][j]=1-
In [1940]:
cip
Out[1940]:
In [2139]:
M1=np.zeros((len(Rip1),len(Ri1)))
b1=np.zeros(len(Rip1))
for i in range(len(inde[0])):
cc=cip1[i][1]/cip1[i][0]
M1[i][inde[0][i]]=cc
M1[i][inde[1][i]]=-1
b1[i]=cc*np.log2(Rip1[i][0])-np.log2(Rip1[i][1])+np.log2(Kip1[i][1])-cc*np.log2(Kip1[i][0])
In [2140]:
M2=np.zeros((len(Rip2),len(Ri2)))
b2=np.zeros(len(Rip2))
for i in range(len(inde[0])):
cc=cip2[i][1]/cip2[i][0]
M2[i][inde[0][i]]=cc
M2[i][inde[1][i]]=-1
b2[i]=cc*np.log2(Rip2[i][0])-np.log2(Rip2[i][1])+np.log2(Kip2[i][1])-cc*np.log2(Kip2[i][0])
In [1807]:
for i in range(3):
print (Rip1[i][0]/Kip1[i][0])**(cip1[i][1]/cip[i][0])*Kip1[i][1]/Rip1[i][1]
In [1674]:
for i in range(3):
print (Aip1[i][0]**(cip1[i][1]/cip[i][0]))/Aip1[i][1]
In [1505]:
for i in range(3):
print cip1[i][1]/cip[i][0]
In [881]:
print Kip1[1]
print Rip1[1]
print cip1[]
dd
Out[881]:
In [870]:
print b1
In [738]:
Z1=np.zeros_like(M1);
Z2=np.zeros_like(M2);
B=np.ones(3*2);
T=np.bmat([[M1,Z1],[Z2,M2]]);
T=np.vstack((T,B));
b=np.vstack((b1,b2));
#b_sum=np.sum([-2/float(3)*x for x in np.array(org1)[:,1]]+Ri1-Ki1)
#b_sum=b_sum+np.sum([-2/float(3)*x for x in np.array(org2)[:,1]]+Ri2-Ki2)
b_sum=np.sum(-np.array(org1)[:,2]+Ri1-Ki1)
b_sum=b_sum+np.sum(-np.array(org2)[:,2]+Ri2-Ki2)
b=np.append(b,b_sum)
In [713]:
aa=np.array(org1)[:,1]
In [726]:
b_sum=np.sum([-2/float(3)*x for x in np.array(org1)[:,1]]+Ri1-Ki1)
b_sum=b_sum+np.sum([-2/float(3)*x for x in np.array(org2)[:,1]]+Ri2-Ki2)
In [727]:
b_sum
Out[727]:
In [761]:
dd
Out[761]:
In [511]:
sort_cip=[]*(len(cip*len(cip)))
for i in range(len(cip)):
for j in range(len(cip)):
sort_cip[i*len(cip)+j]=cip[i][1]/cip[i][0]*cip[j][1]/cip[j][0]
sort_cip=np.sort(sort_cip)
In [506]:
F=np.vstack((M[0],M[5],M[9],M[12],M[13],M[14]))
In [1675]:
for i in range(len(cip1)):
print Aip1[i][1]*Kip1[i][1]/Rip1[i][1]-(Aip1[i][0]*Kip1[i][0]/Rip1[i][0])**(cip1[i][1]/cip1[i][0])
In [1943]:
for i in range(len(cip1)):
print (cip1[i][1]/cip1[i][0])*np.log2(org1[inde[0][i]][3])-np.log2(org1[inde[1][i]][3])
print b1[i]
In [877]:
np.log2(dd)
Out[877]:
In [1508]:
dd=np.array([p[3] for p in org1])
dd
Out[1508]:
In [879]:
print M1
In [628]:
tau_snok[0]/float(tau_snok[1])
Out[628]:
In [631]:
tau_snok[0]/float(tau_snok[2])
Out[631]:
In [632]:
tau_snok[1]/float(tau_snok[2])
Out[632]:
In [1509]:
viaCellDist=np.dot(M1,np.log2(dd).T)
viaCellDist
Out[1509]:
In [1510]:
b1
Out[1510]:
In [797]:
print b1
In [613]:
cip[1][1]/cip[1][0]
Out[613]:
In [621]:
tau_snok[2]/float(tau_snok[1])
Out[621]:
In [618]:
tau_snok[1]
Out[618]:
In [612]:
-cip[1][1]/cip[1][0]+cip[0][1]/cip[0][0]*cip[2][1]/cip[2][0]
Out[612]:
In [614]:
cip
Out[614]:
In [583]:
cip2=create_pairs([p[1] for p in same_org])
cip2
Out[583]:
In [578]:
inde
Out[578]:
In [560]:
yy=np.linalg.solve(M[0:6],b.T[0:6])
In [ ]:
Mt
In [1927]:
print np.linalg.pinv(Mt)
In [1928]:
np.linalg.norm(np.linalg.pinv(Mt))*np.linalg.norm(Mt)
Out[1928]:
In [497]:
inde
Out[497]:
In [489]:
.43*.63-.55*.49
Out[489]:
In [1892]:
print D1_snok
print C1_snok
print tau1_snok
In [2116]:
M1.shape
Out[2116]:
In [2142]:
np.linalg.norm(np.linalg.pinv(Mt))*np.linalg.norm(Mt)
Out[2142]:
In [2249]:
Mt=M1;
bt=b1;
Mt=np.append(Mt,[np.zeros(n)], axis=0);
Mt[-1][-1]=1
bt=np.append(bt,[0], axis=0);
bt[-1]=(np.log2(Ri1[-1])-np.log2(Ki1[-1])-org1[-1][2])
#bt[-1]=np.mean(np.log2(np.array(Rip1)[:,0])-np.log2(Kip1[:,0])-np.array(cip1)[:,0])
bt[-1]=np.log2(Ri1[-1])-np.log2(Ki1[-1])-cip1[-1][0]
bt[-1]=np.log2(Ri1[-1])-np.log2(Ki1[-1])-aa1[-1]
#bt[-1]=np.log2(Ahat[-25])
bt[-1]
Out[2249]:
In [2238]:
np.abs(np.abs(np.log2(Ri1[-1])-np.log2(Ki1[-1])-org1[-1][2])-abs(bt[-1]))/np.abs(np.log2(Ri1[-1])-np.log2(Ki1[-1])-org1[-1][2])
Out[2238]:
In [2250]:
print np.log2(Ri1[-1])-np.log2(Ki1[-1])-org1[-1][2]
print np.log2(Ri1[-1])-np.log2(Ki1[-1])-org1[-1][1]
print np.log2(Ri1[-1])-np.log2(Ki1[-1])-cip1[-1][0]
print np.mean(np.log2(Ri1)-np.log2(Ki1)-np.array(org1)[:,1])
In [2240]:
log2Ahat=np.linalg.lstsq(Mt,bt.T)[0]
log2Ahat
Out[2240]:
In [2241]:
log2Ahat=np.linalg.solve(Mt,bt.T)
log2Ahat
In [2242]:
Ahat=2**log2Ahat
In [2243]:
np.abs(np.log2(dd))-np.abs(np.log2(Ri1)-np.log2(Ki1)-np.array(org1)[:,2])
Out[2243]:
In [2244]:
np.abs(np.abs(np.log2(Ri1)-np.log2(Ki1)-np.array(org1)[:,2])-abs(np.log2(Ahat)))/np.abs(np.log2(Ri1)-np.log2(Ki1)-np.array(org1)[:,2])
Out[2244]:
In [2245]:
dd=np.array([p[3] for p in org1])
#aun=np.array(Ahat)/(np.mean(Ahat)/np.mean(dd));
#aun=np.array(Ahat)/(Ahat[0]/dd[0]);
print np.abs((dd-Ahat)/dd)
print Ahat
In [2246]:
np.log2(dd)
Out[2246]:
In [2247]:
aa=np.array(org1)[:,2];
np.abs(aa-np.abs(np.log2(Ri1)-np.log2(Ki1)-np.log2(Ahat)))/aa
Out[2247]:
In [2251]:
aa1=(np.log2(Ri1)-np.log2(Ki1)-np.log2(Ahat))
In [2252]:
aa2=(np.log2(Ri1)-np.log2(Ki1)-np.log2(Ahat))
In [2254]:
k1=aa1/np.array(org1)[:,1]
k2=aa2/np.array(org1)[:,1]
print k1
In [ ]:
In [2100]:
aa1/aa2
Out[2100]:
In [2092]:
(np.log2(Ri1)-np.log2(Ki1)-np.log2(Ahat))/np.array(org1)[:,1]
Out[2092]:
In [2078]:
D1_snok/float(C1_snok)
Out[2078]:
In [ ]:
In [2107]:
np.log2(aa1/(np.array(org1)[:,1]))-np.log2(aa2/(np.array(org1)[:,1]))
Out[2107]:
In [2102]:
((aa1-aa2)/(np.array(org1)[:,1]))/()
Out[2102]:
In [2043]:
np.log2(Ri1)-np.log2(Ki1)-np.log2(Ahat)
Out[2043]:
In [2044]:
np.array(org1)[:,2]
Out[2044]:
In [170]:
def null(a, rtol=1e-5):
u, s, v = np.linalg.svd(a)
rank = (s > rtol*s[0]).sum()
return rank, v[rank:].T.copy()
In [594]:
def create_pairs(arr):
l=len(arr)
arr=cartesian([arr,arr]).reshape((l,l,2))
inde=np.triu_indices(l,k=1)
return arr[inde]
In [74]:
l1=[x.l for x in c.pop.values()]/np.mean([x.l for x in c.pop.values()])
In [75]:
arr1=cartesian([[x.C for x in c.pop.values()],[x.C for x in c.pop.values()]])
arr1=arr1.reshape((len(sel[0]),len(sel[0]),2))
arrC=arr1[inde]
In [160]:
def A_snok(R,K,l,cik):
return ((K[1]*l[1])/float(R[1]))*(R[0]/(float(K[0])*l[0]))**(cik[1]/float(cik[0]))
In [212]:
1**.2
Out[212]:
In [68]:
print c.pop.keys()
print c.distribution
In [21]:
c=Community('comm0',[acc[s] for s in sel[0]],[growth_param[s] for s in sel[0]],td,'bowtie2')
In [708]:
c.write_reads()
In [731]:
c.collect()
In [487]:
len(c.samples[0][0])
Out[487]:
In [709]:
c.run_pipeline()
In [512]:
[res,hfit,pfit]=c.compare_fit()
In [481]:
100*10**5/float(c.pop['NC_000913.3'].l)
Out[481]:
In [2255]:
tot_reads=[10**6]#np.linspace(5*10**3,10**4,15)
c=Community('comm0',[acc[s] for s in sel[0]],[growth_param[s] for s in sel[0]],td,'bowtie2')
c.build_index()
for nr in tot_reads:
c.sample(nr)
c.write_reads()
c.run_pipeline()
[res,hfit,pfit]=c.compare_fit()
In [448]:
np.log2(1.2978)
Out[448]:
In [614]:
c.distribution
Out[614]:
In [616]:
c.pop.keys()
Out[616]:
In [570]:
tot_reads[0]*100/float(c.pop['NC_000913.3'].l)
Out[570]:
In [343]:
c.write_reads()
In [1]:
pl=plt.plot(tot_reads[0:6],hfit,tot_reads[0:6],pfit)
ax=pl[0]
#pl[0].set_xscale('log')
#plt.legend(['p(x) fit','Linear fit'])
#plt.plot(tot_reads,pfit,label='Linear fit')
In [ ]:
ax.set_yscale('log')
In [270]:
[res,hfit,pfit]=c.compare_fit()
In [234]:
res.plot()
pfit
Out[234]:
In [177]:
start = time.time()
print("hello")
end = time.time()
print(end - start)
Out[177]:
In [106]:
community=[]
nrs=np.linspace(5*10**3,10**6,10)
res=[]
for i in range(9):
community.append(run_community_analysis('smr',td,'bowtie2',acc,growth_param,comm,nrs[i],0))
res.append(community[i][0].compare_fit())
In [ ]:
cp0=community[0].pop.values()
Gekv(cp0[0].B,cp0[0].C)
In [100]:
#PTRs
for i,c in enumerate(comm):
print "comm"+str(i)
for c_sub in c:
print acc[c_sub]+": "+str(2**growth_param[c_sub][1])
In [30]:
cov1=2**(np.load(join(td,'comm1/Data/reads/npy',acc[comm[1][1]]+'.depth.npy')))
acc[comm[1][1]]
Out[30]:
In [58]:
#print c.pop[0].l/float(c.pop[1].l)*.8
#print c.pop[1].l/float(c.pop[0].l)*.2
c
Out[58]:
In [32]:
indd=1;
x=np.linspace(0,1,len(cov1))
plt.plot(x*c.pop[indd].l,cov1/(cov1.sum()/len(cov1)*c.pop[indd].l))
plt.plot(x*c.pop[indd].l,piecewise_prob(x*c.pop[indd].l,c.pop[indd].C,c.pop[indd].l))
plt.plot(x*c.pop[indd].l,res.best_fit)
In [24]:
signal=cov1/(cov1.sum()/len(cov1)*c.pop[indd].l)
res=fit_signal(signal,c.pop[indd].l)
In [25]:
err=(c.pop[indd].C-res.best_values['C'])/c.pop[indd].C
from_ptr=np.load(join(td,'comm1/Data/reads/npy',acc[comm[1][1]]+'.depth.best.npy'))
err2=(c.pop[indd].C-from_ptr[2]+from_ptr[3])/c.pop[indd].C
print err, err2
In [26]:
c.pop[indd].C
Out[26]:
In [45]:
conf=local_conf(join(td,'comm1'),'bowtie2')
In [23]:
import example
In [48]:
example.main(join(conf['ref_path'],'Fasta'),False,conf['data_path'],conf['node_path'],conf['output_path']+"/out.df",None,True)
In [38]:
conf['ref_path']
Out[38]:
In [111]:
refs=open_records(glob(join(conf['ref_path'],'Fasta','*.fasta')))
In [119]:
glob(join(conf['ref_path'],'Fasta','*.fasta'))
Out[119]:
In [112]:
refs
Out[112]:
In [57]:
import PTRC
In [94]:
import pickle
In [97]:
ind
Out[97]:
In [101]:
f=open('/mnt/index.pk','r')
f.close()
f=open('/mnt/index.pk','w')
pickle.dump(ind,f)
f.close()
In [86]:
f=open('/mnt/index.pk','r')
ind=pickle.load(f)
pickle.save
In [93]:
ind=({'Bacteroides vulgatus': ['SegalLab|882']},{})
In [88]:
PTRC.coverage_analysis(args.i1, args.i2, args.m, args.pe, args.gz, \
args.outfol, args.qual_format, args.cov_thresh, dbpath = args.db_path_name)
In [23]:
r=glob(conf['data_path']+'/*')
In [32]:
args=namedtuple('args','i1 i2 m pe gz outfol qual_format cov_thresh db_path_name')
args.i1=r[0]
args.i2=r[1]
#args.m=None#conf['data_path']+"/blah.m"
args.pe=True
args.gz=False
args.outfol=conf['output_path']
args.qual_format='offset-33'
args.cov_thresh=5
args.db_path_name=None
In [2]:
cd /mnt
In [32]:
ro=pd.read_pickle('comm0/re.df')
In [37]:
ro['reads']
Out[37]:
In [27]:
import pandas
import pickle
In [32]:
cd /Users/hedani/Documents/Projects/MicrobialEcology/CommunitySampling
In [33]:
f=open('comm0/sim_db.pk')
pk=pickle.load(f)
In [34]:
pk
Out[34]:
In [32]:
mutable_seq
Out[32]:
In [67]:
mutated
Out[67]:
In [72]:
rec2=rec
rec2.seq=mutated
rec2.id="mut_"+rec2.id
rec2.description="mut_"+rec2.description
rec2.name="mut_"+rec2.name
In [ ]:
In [66]:
mutated=mutateSeq(mutable_seq,.01)
In [71]:
with open("/Users/hedani/Documents/Projects/MicrobialEcology/CommunitySampling/comm0/References/Fasta/NC_000913.3_mutated.fasta", "w") as output_handle:
SeqIO.write(rec2, output_handle, "fasta")
In [65]:
def mutateSeq(mutable_seq,rate):
nr=['A','C','G','T']
nt={'A': 0,'C': 1,'G': 2,'T': 3}
prob=np.matrix('0 1 1 1; 1 0 1 1; 1 1 0 1; 1 1 1 0')/float(3)
prob=prob.tolist()
rand_ch=np.random.choice(range(len(mutable_seq)),np.round(len(mutable_seq)*rate))
for i,c in enumerate(rand_ch):
mutable_seq[c]=np.random.choice(nr,None,False,prob[nt[mutable_seq[c]]])
return mutable_seq
In [46]:
IUPAC.unambiguous_dna.letters
Out[46]:
In [52]:
prob=np.matrix('0 1 1 1; 1 0 1 1; 1 1 0 1; 1 1 1 0')/float(3)
prob.tolist()
Out[52]:
In [29]:
# import trimmed multiple alignment for creating MC
handle = open("/Users/Shared/ecol_maf2.fasta-gb", "rU")
seq=[]
for rec in SeqIO.parse(handle, "fasta"):
seq.append(rec.seq)
In [54]:
for i in range(len(seq[0])):
seq[i]=
Out[54]:
In [9]:
ls comm0/References/Fasta/
In [31]:
from Bio import SeqIO
from Bio.Seq import MutableSeq
from Bio.Alphabet import IUPAC
handle = open("/Users/hedani/Documents/Projects/MicrobialEcology/CommunitySampling/comm0/References/Fasta/NC_000913.3.fasta", "rU")
for rec in SeqIO.parse(handle, "fasta"):
seq = rec.seq
mutable_seq = MutableSeq(str(rec.seq), IUPAC.unambiguous_dna)
handle.close()
In [ ]:
for rec in SeqIO.parse("sequence.gb", "genbank"):
if rec.features:
for feature in rec.features:
if feature.type == "CDS":
print feature.location
print feature.qualifiers["protein_id"]
print feature.location.extract(rec).seq
In [ ]:
x=None
In [218]:
# Import relevant modules
import pymc
import numpy as np
# Some data
observations=[x for x in samp/float(1000) if x < 1/float(2)]
#observations=samp
#n = 5*np.ones(4,dtype=int)
#x = np.array([-.86,-.3,-.05,.73])
# Priors on unknown parameters
#alpha = pymc.Normal('alpha',mu=0,tau=.01)
#beta = pymc.Normal('beta',mu=0,tau=.01)
# Define prior
probs = pymc.Dirichlet(name="theta",theta=[1/float(2)]*3)
#probs_full = pymc.CompletedDirichlet("theta_full",theta)
# Arbitrary deterministic function of parameters
#@pymc.deterministic
#def theta(a=alpha, b=beta):
# """theta = logit^{-1}(a+b)"""
# return pymc.invlogit(a+b*x)
def PB(x,Bb,Cc):
return 2**(2*Cc*x)*(-1 + 2**Bb)
def PC(x,Cc):
return (2**Cc - 2**(1 + 2*Cc*x) + 2**(Cc + 2*Cc*x))/2**Cc
def PD(x,Cc,Dd):
return 2**(1 - Cc - Dd + 2*Cc*x)*(-1 + 2**Dd)
@pymc.stochastic(dtype=float,observed=True)
def P(probs=probs,value=observations):
theta = np.hstack((probs,1-probs.sum()))
#Bb=theta[0]
#Cc=theta[1]
#Dd=theta[2]
return np.prod(PB(value,theta[0],theta[1])+PC(value,theta[1])+PD(value,theta[1],theta[2]))
alpha = pymc.Uniform('alpha',lower=0, upper=1000)
beta = pymc.Uniform('beta',lower=0, upper=1000)
C = pymc.Beta('C',alpha=alpha,beta=beta)
@pymc.stochastic(dtype=float,observed=True)
def pn(C=C,beta=beta,value=observations):
#theta = np.hstack((probs,1-probs.sum()))
#Bb=theta[0]
#Cc=theta[1]
#Dd=theta[2]
return np.prod([pxn(x,C,1) for x in value])
#model = pymc.Model([probs,P])
model = pymc.Model(pn)
#graph = pymc.graph.graph(model)
#graph.write_png("graph.png")
#fitter=pymc.MAP(model)
#fitter.fit()
M = pymc.MCMC([C,model])
M.use_step_method(pymc.AdaptiveMetropolis,C)
M.sample(iter=1500, burn=400, thin=2)
print "Maximum posterior probs = ", C.value
In [219]:
pymc.Matplot.plot(M)
In [92]:
nr_samples=10000
samp=inverse_transform_sampling(.6,nr_samples,1000)
In [208]:
observations=[x for x in samp/float(1000) if x < 1/float(2)]
[pxn(x,.6,1) for x in observations]
Out[208]:
In [205]:
pxn2(.5,.6)
Out[205]:
In [228]:
import pymc
import numpy as np
n = 5*np.ones(4,dtype=int)
x = np.array([-0.86,-0.3,-0.05,.73])
alpha = pymc.Normal('alpha',mu=0,tau=.01)
beta = pymc.Normal('beta',mu=0,tau=.01)
@pymc.deterministic
def theta(a=alpha, b=beta):
"""theta = logit^{−1}(a+b)"""
return pymc.invlogit(a+b*x)
d = pymc.Binomial('d', n=n, p=theta, value=np.array([0.,1.,3.,5.]),
observed=True)
In [230]:
S = pymc.MCMC([theta],db='pickle')
S.sample(iter = 10000, burn = 5000, thin = 2)
pymc.Matplot.plot(S)
In [226]:
pymc.Matplot.plot(S)
In [ ]: