In [25]:
%load_ext autoreload
%autoreload 2
%load_ext sql
import xlwt
import pandas as pd
import bio.hts.apredica as apr
from bio.hts.htsdb import *
HCI_DIR = '/share/server_data/project/HCI/HepRn-1/'
DAT_DIR = HCI_DIR+'data/'
INC_DIR = DAT_DIR+'incoming/'
mng.register_connection("hts-db","htsdb",username="ishah",
password="ishah",host='localhost')
In [27]:
HtsPlate.objects.count()
Out[27]:
In [12]:
!ls $DAT_DIR
In [13]:
# FIles created in Apredica Oct 2013
R_HepRn = pd.read_csv(DAT_DIR+'apr-heprn-sep-2013.csv')
In [14]:
R_HepRn.head()
Out[14]:
In [17]:
print 'HepRn',R_HepRn.phase.unique()
print 'Times',R_HepRn.time.unique()
print 'Assays',R_HepRn.feature.unique()
In [18]:
#A=True
#if A:
# HtsChem.drop_collection()
# HtsExp.drop_collection()
# HtsAssay.drop_collection()
# HtsWell.drop_collection()
# HtsPlate.drop_collection()
# HtsConcRespCurveNrm.drop_collection()
# HtsAssayResult.drop_collection()
# HtsAnalysisMethod.drop_collection()
# HtsAnalysisStep.drop_collection()
In [11]:
Exp=apr.createAprExperiment(name='Apredica High Content Imaging in Rat primary hepatocytes (Phase I)',
org ='rat',cell='HepRn',tags=['HCS','PhI'],
eid='APR-HepRn-PhI')
apr.storeAprExperiment(R_HepRn,Exp,dbg=True)
In [13]:
# update chemicals
HtsChem.objects.update(set__ctrl=0)
CTRL = pd.read_csv(DAT_DIR+'apr-ctrl.csv')
for x in CTRL.to_records():
print x['chemical_name']
HtsChem.objects(name__iexact=x['chemical_name']).update(set__ctrl=x['ctrl'])
for HC in HtsChem.objects(chid__exists=0):
C = Chemical.objects(Q(casrn=HC.casrn) | Q(name__iexact=HC.name)).first()
if C:
print 'found',HC.name
HC.chid=C.id
HC.save()
else:
print 'not found',HC.name
In [14]:
# update treatment duration of
for P1 in HtsPlate.objects:
TH = set([i.timeh for i in P1.wells])
if len(TH)>1:
print 'Multiple times:',P1.eid
P1.timeh=TH.pop()
P1.save()
In [287]:
CHID = [i.id for i in Chemical.objects(tags='tx-pharma')]
HtsChem.objects(ctrl=0,chid__in=CHID).count()
CASRN=map(str,Chemical.objects(tags='tx-pharma').distinct('casrn'))
HtsChem.objects(ctrl=0,casrn__in=CASRN).count()
#CASRN[:10]
Out[287]:
In [29]:
# Create the analysis method
Steps = [HtsAnalysisStep(step=1,name='average-conc-reps',pkg='bio.hts.apredica',fun='analyzeHtsPlateConcResp',desc='Average assay response across replicate wells'),
HtsAnalysisStep(step=2,name='-conc-response',pkg='bio.hts.apredica',fun='analyzeHtsPlateConcResp',desc='Smooth the assay response at each concentration using a moving window and weights (convolution)',
params=[HtsParam(name='window_len',value=7),HtsParam(name='smoother',value='np.hamming')]),
HtsAnalysisStep(step=3,name='smooth-conc-response',pkg='bio.hts.apredica',fun='analyzeHtsPlateConcResp',desc='Smooth the assay response at each concentration using a moving window and weights (convolution)',
params=[HtsParam(name='window_len',value=7),HtsParam(name='smoother',value='np.hamming')]),
HtsAnalysisStep(step=4,name='normalize-by-plate-median',pkg='bio.hts.apredica',fun='analyzeHtsPlateConcResp',desc='Normalize by median response of assay across non-control wells report as log2(FC)',
params=[HtsParam(name='normby',value='median')]),
HtsAnalysisStep(step=5,name='calc-pct-resp',pkg='bio.hts.apredica',fun='analyzeHtsPlateConcResp',desc='Calculate the percentage change. Min=treated.quantile(0.02) and Max=posctrl.quantile(0.98)',
params=[HtsParam(name='pct_qb',desc='min_resp_quantile',value=0.02),HtsParam(name='pct_qt',desc='max_resp_quantile',value=0.98)]),
HtsAnalysisStep(step=6,name='calc-zscore-resp',pkg='bio.hts.apredica',fun='analyzeHtsPlateConcResp',desc='Calculate the z-score using the median and std of assay response on plate')
]
S2 = []
for S in Steps: S2.append(S.save())
CR_meth=HtsAnalysisMethod(name='plate-window-smoothing-median-norm-pct',desc="Process Apredica plate: (a) Hamming smoother with window-length=7, (b) Median normalization, (c) Percentage change calculation ",
steps=S2)
CR_meth.save()
Out[29]:
In [30]:
CR_meth=HtsAnalysisMethod.objects(name='plate-window-smoothing-median-norm-pct').first()
In [24]:
# Generate Conc Resp Points
HtsConcRespCurveNrm.drop_collection()
CR_meth=HtsAnalysisMethod.objects(name='plate-window-smoothing-median-norm-pct').first()
N = HtsPlate.objects.count()
for i,pid in enumerate(HtsPlate.objects.distinct('eid')):
print '%10s %3d/%d' %(pid,i,N)
apr.storeHtsPlateConcResp(pid,meth=CR_meth,window_len=7,smoother=np.hanning,dbg=False)
In [36]:
# Create the analysis method
# tol0=1e-4,lfc0=0.2,mit0=200,roots='fsolve',
Steps = [HtsAnalysisStep(step=1,name='interp-spline-conc-resp',pkg='bio.hts.apredica',fun='analyzeChemConcResp',
desc='Use a spline to interpolate response values: fold change (FC) and z-score (ZS)',
params=[HtsParam(name='spline_smooth',value=0)]),
HtsAnalysisStep(step=2,name='calc-conc-prod-sig-effect',pkg='bio.hts.apredica',fun='analyzeChemConcResp',
desc='Calculate the LEC as the minimum concentration at which the z-score reaches Z0 and abs(log2(FC)) > lfc0.' +
'Solve for the root(s) of the interpolated response i.e. ZS-Z0=0. In case of multiple roots pick lowest conc',
params=[HtsParam(name='Z0',value=1,desc='The z-score threshold for significance'),
HtsParam(name='lfc0',value=0.2,desc='The log2(FC) threshold for significance (used in conjunction with Z0'),
HtsParam(name='roots',value='fsolve',desc='Root finding function from scipy.optimize')]),
HtsAnalysisStep(step=3,name='calc-potency',pkg='bio.hts.apredica',fun='analyzeChemConcResp',
desc='Calculate the maximum lfc (top)')
]
S2 = []
for S in Steps: S2.append(S.save())
CR_hit_meth=HtsAnalysisMethod(name='apr-calc-chem-assay-plate-lec-1',desc="Calculate the LEC from APR concentration response curves by deviation from background.",
steps=S2)
CR_hit_meth.save()
Out[36]:
In [2]:
CR_hit_meth=HtsAnalysisMethod.objects(name__iexact='apr-calc-chem-assay-plate-lec-1').first()
#CR_hit_meth.delete()
CR_hit_meth
Out[2]:
In [3]:
import bio.hts.apredica as apr
from bio.hts.htsdb import *
HtsAssayResult.drop_collection()
N = HtsConcRespCurveNrm.objects.count()
for k,Crc in enumerate(HtsConcRespCurveNrm.objects):
if k and k % 300==0:
print '%4d/%4d' % (k,N)
#if HtsAssayResult.objects(crc=Crc): continue
try:
apr.storeChemConcResp(Crc,llec_max=6.0,lec_max=1e6,analysis_meth=CR_hit_meth,Z0=1,lfc0=0.2,dbg=False)
except:
print "Failed: ",Crc.assay.name,Crc.chem.name,Crc.plate.eid
In [222]:
Cas_cnt = dict( ((c,HtsChem.objects(casrn=c).count()) for c in HtsChem.objects.distinct('casrn') ))
REPS = [(HtsChem.objects(casrn=c).first().name,c,n,HtsChem.objects(casrn=c).distinct('eid')) for c,n in Cas_cnt.iteritems() if n>1]
[(HtsChem.objects(casrn=c).first().name,c,n) for c,n in Cas_cnt.iteritems() if n>1]
Out[222]:
In [297]:
#PX = analyzeHtsPlateConcResp('071015TP37SP2-72',window_len=7,smoother=np.bartlett,dbg=True)
#set([len(i.resp) for i in HtsConcRespCurveNrm.objects])
#print cr.lconc,cr.ppct,cr.slfc
#HtsConcRespCurveNrm.objects[20].plate.eid
#PX = analyzeHtsPlateConcResp(HtsConcRespCurveNrm.objects[20].plate.eid,window_len=7,smoother=np.bartlett,dbg=True)
E1 = HtsExp.objects[0]
CRC = HtsConcRespCurveNrm.objects(plate__in=HtsPlate.objects(exp=E1)).first()
Out[297]:
In [340]:
#DZS1(LC1[:10])
nc
Out[340]:
In [48]:
HtsAssayResult.objects(chem=HtsChem.objects(name__icontains='bisphenol').first()).count()
for AR in HtsAssayResult.objects(chem__in=HtsChem.objects(name__icontains='glitazone')):
lec = next(i for i in AR.res if i.name=='lec')
if lec.value>1e5: continue
eff = next(i for i in AR.res if i.name=='efficacy')
print AR.chem.name,AR.chem.eid,AR.crc.plate.exp.cell,AR.crc.plate.timeh,AR.assay.name,lec.value,lec.units,eff.value
In [28]:
#HtsConcRespCurveNrm.objects(assay__in=(HtsAssay.objects(name__icontains='CellLoss')),plate__in=(HtsPlate.objects(timeh=24))).count()
#set([i.chem.name for i in HtsAssayResult.objects((Q(res__name='llec') & Q(res__value__lt=-7)))])
#HtsAssayResult.objects(__raw__={'chem.name':{'$regex':'phthalate','$options':'i'}}).count()
HtsAssayResult.objects(__raw__={'res':{'$elemMatch':{'name':'llec','value':{'$lt':-7}}}},).count()
Out[28]:
In [68]:
D1,T1,C1 = getPlateData('071015TP37SP2-72')
D1.ctrl.unique()
T1.groupby(level=1).median()
FC1 = T1/T1.quantile(q=0.5)
FC1.hist()
Out[68]:
In [22]:
P1 = HtsPlate.objects(eid='01S2T24P0299').first()
W1 = P1.wells[100].chem
W1._data
HtsChem.objects(ctrl=None).count()
Out[22]:
In [18]:
# Create ToxCast APR assay 'parent' for the feature irrespective of direction / time.
import copy
for A in Assay.objects(tech__iexact='apr'):
A1 = copy.deepcopy(A)
A1.id=None
A1.eid = re.sub('_\d+.*','',A1.eid)
A1.name= re.sub('\s+\d+.*','',A1.name)
if not Assay.objects(name=A1.name,eid=A1.eid):
print ' Saving:',A1.eid,A1.name
A1.save()
In [20]:
for A in Assay.objects(tech__iexact='apr'):
print A.eid