In [1]:
%reload_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore')
import os.path as op
from collections import Counter
import pandas as pd
import numpy as np
from rdkit.Chem import AllChem as Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
Draw.DrawingOptions.atomLabelFontFace = "DejaVu Sans"
Draw.DrawingOptions.atomLabelFontSize = 18
from misc_tools import nb_tools as nbt # , html_templates as html, apl_tools as apt
from rdkit_ipynb_tools import tools # , bokeh_tools as bt, pipeline as p, clustering as cl
from cellpainting import processing as cpp, tools as cpt, reporting as cpr
import ipywidgets as ipyw
from IPython.core.display import HTML, display, clear_output #, Javascript, display_png, clear_output, display
COMAS = "/home/pahl/comas/share/export_data_b64.tsv.gz"
In [4]:
DATE = "170530" # "170704", "170530"
PLATE = "SI0012"
CONF = "conf170511mpc" # "conf170623mpc", "conf170511mpc"
QUADRANTS = [1] # [1, 2, 3, 4]
WRITE_PKL = False
UPDATE_SIMILAR = False
UPDATE_DATASTORE = False
for quadrant in QUADRANTS:
SRC_DIR = "/home/pahl/comas/projects/painting/{}-{}-{}_{}".format(DATE, PLATE, quadrant, CONF)
REPORTNAME = "report_{}-{}".format(PLATE, quadrant)
# REPORTNAME = "report"
keep = ["Compound_Id", "Container_Id", "Producer", "Conc_uM", "Activity", "Toxic", "Pure_Flag", "Rel_Cell_Count",
'Act_Profile', "Metadata_Well", "Plate", 'Smiles']
data_keep = keep.copy()
cpt.create_dirs(op.join(REPORTNAME, "details"))
print("\nProcessing plate {}_{}-{}_{} ...".format(DATE, PLATE, quadrant, CONF))
ds_plate = cpp.load(op.join(SRC_DIR, "Results.tsv"))
ds_plate = ds_plate.group_on_well()
ds_plate = ds_plate.remove_skipped_echo_direct_transfer(op.join(SRC_DIR, "*_print.xml"))
ds_plate = ds_plate.well_type_from_position()
ds_plate = ds_plate.flag_toxic()
ds_plate = ds_plate.activity_profile()
ds_plate = ds_plate.join_layout_1536(PLATE, quadrant)
ds_plate.data["Plate"] = "{}-{}-{}".format(DATE, PLATE, quadrant)
ds_plate = ds_plate.join_smiles()
ds_profile = ds_plate[keep]
if UPDATE_SIMILAR:
ds_profile.update_similar_refs(write=False)
if WRITE_PKL:
ds_profile.write_pkl("{}-{}-{}_profile.pkl".format(DATE, PLATE, quadrant))
# ds_profile = cpp.load_pkl("{}-{}-{}_profile.pkl".format(DATE, PLATE, quadrant))
ds_report = ds_profile.sort_values(["Toxic", "Activity"], ascending=[True, False])
# ds_report = ds_profile.remove_toxic()[0].sort_values("Activity", ascending = False)
# ds_report.data = ds_report.data.head(10)
cpr.full_report(ds_report, SRC_DIR, report_name=REPORTNAME,
plate="{}-{}".format(PLATE, quadrant), highlight=True)
if UPDATE_DATASTORE:
ds_profile.update_datastore(mode="cpd", write=False)
if UPDATE_SIMILAR:
cpp.write_sim_refs()
if UPDATE_DATASTORE:
cpp.write_datastore()
In [ ]:
cpp.write_datastore()
In [ ]:
cpp.clear_resources()
In [5]:
DATE = "170530" # "170704", "170530"
PLATE = "SI0012"
CONF = "conf170511mpc" # "conf170623mpc", "conf170511mpc"
QUADRANTS = [1] # [1, 2, 3, 4]
for quadrant in QUADRANTS:
SRC_DIR = "/home/pahl/comas/projects/painting/{}-{}-{}_{}".format(DATE, PLATE, quadrant, CONF)
REPORTNAME = "report_{}-{}".format(PLATE, quadrant)
# REPORTNAME = "report"
cpt.create_dirs(op.join(REPORTNAME, "details"))
print("\nProcessing plate {}_{}-{}_{} ...".format(DATE, PLATE, quadrant, CONF))
ds_profile = cpp.load_pkl("{}-{}-{}_profile.pkl".format(DATE, PLATE, quadrant))
ds_report = ds_profile.sort_values(["Toxic", "Activity"], ascending=[True, False])
# ds_report = ds_profile.remove_toxic()[0].sort_values("Activity", ascending = False)
# ds_report.data = ds_report.data.head(10)
cpr.full_report(ds_report, SRC_DIR, report_name=REPORTNAME,
plate="{}-{}".format(PLATE, quadrant), highlight=True)
In [ ]:
REF_DIR = "/home/pahl/comas/projects/painting/references"
PLATE_NAMES = ["S0195", "S0198", "S0203"] # "S0195", "S0198", "S0203"
DATES = {"S0195": "170523", "S0198": "170516", "S0203": "170512"}
REPORTNAME = "references"
cpt.create_dirs(op.join(REPORTNAME, "details"))
pb = nbt.ProgressbarJS()
ds_ref = cpp.load("references_act_prof.tsv")
num_steps = 4 * len(PLATE_NAMES)
step = 0
for plate in PLATE_NAMES:
for idx in range(1, 5):
step += 1
pb.update(100 * step / num_steps)
SRC_DIR = "{}/{}-{}".format(REF_DIR, plate, idx)
print("\nProcessing plate {}-{} ...".format(plate, idx))
ds_profile = ds_ref[ds_ref["Plate"] == "{}-{}-{}".format(DATES[plate], plate, idx)].copy()
ds_report = ds_profile.sort_values(["Toxic", "Activity"], ascending=[True, False])
cpr.full_report(ds_profile, SRC_DIR, report_name=REPORTNAME,
plate="{}-{}".format(plate, idx), highlight=True, mode="ref")
pb.done()
In [ ]: