In [1]:
%reload_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore')

import os.path as op
from collections import Counter

import pandas as pd
import numpy as np

from rdkit.Chem import AllChem as Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
Draw.DrawingOptions.atomLabelFontFace = "DejaVu Sans"
Draw.DrawingOptions.atomLabelFontSize = 18

from misc_tools import nb_tools as nbt  # , html_templates as html, apl_tools as apt
from rdkit_ipynb_tools import tools  # , bokeh_tools as bt, pipeline as p, clustering as cl

from cellpainting import processing as cpp, tools as cpt, reporting as cpr

import ipywidgets as ipyw
from IPython.core.display import HTML, display, clear_output  #, Javascript, display_png, clear_output, display

COMAS = "/home/pahl/comas/share/export_data_b64.tsv.gz"


> interactive IPython session.
Loading BokehJS ...
misc_tools.apl_tools                          (commit: fb5de1f ( 2017-06-23 13:16:39 ))
rdkit_ipynb_tools.tools                       (commit: 2321c7a ( 2017-07-10 22:15:00 ))
- no local installation of JSME found, using web version.
> loaded Nim extension.
cellpainting.processing                       (commit: ed8df03 ( 2017-08-07 15:34:08 ))
cellpainting.reporting                        (commit: ed8df03 ( 2017-08-07 15:34:08 ))

Analyze and Report Current Plate

Analyze and report a Cell Painting screening plate in 384 format


In [4]:
DATE             = "170530"  # "170704", "170530"
PLATE            = "SI0012"
CONF             = "conf170511mpc"  # "conf170623mpc", "conf170511mpc"
QUADRANTS        = [1]  # [1, 2, 3, 4]

WRITE_PKL        = False
UPDATE_SIMILAR   = False
UPDATE_DATASTORE = False

for quadrant in QUADRANTS:
    SRC_DIR    = "/home/pahl/comas/projects/painting/{}-{}-{}_{}".format(DATE, PLATE, quadrant, CONF)
    REPORTNAME = "report_{}-{}".format(PLATE, quadrant)
    # REPORTNAME = "report"

    keep = ["Compound_Id", "Container_Id", "Producer", "Conc_uM", "Activity", "Toxic", "Pure_Flag", "Rel_Cell_Count", 
            'Act_Profile', "Metadata_Well", "Plate", 'Smiles']
    data_keep = keep.copy()

    cpt.create_dirs(op.join(REPORTNAME, "details"))

    print("\nProcessing plate {}_{}-{}_{} ...".format(DATE, PLATE, quadrant, CONF))
    ds_plate = cpp.load(op.join(SRC_DIR, "Results.tsv"))
    ds_plate = ds_plate.group_on_well()
    ds_plate = ds_plate.remove_skipped_echo_direct_transfer(op.join(SRC_DIR, "*_print.xml"))
    ds_plate = ds_plate.well_type_from_position()

    ds_plate = ds_plate.flag_toxic()
    ds_plate = ds_plate.activity_profile()
    ds_plate = ds_plate.join_layout_1536(PLATE, quadrant)   
    ds_plate.data["Plate"] = "{}-{}-{}".format(DATE, PLATE, quadrant)

    ds_plate = ds_plate.join_smiles()
    ds_profile = ds_plate[keep]
    if UPDATE_SIMILAR:
        ds_profile.update_similar_refs(write=False)

    if WRITE_PKL:
        ds_profile.write_pkl("{}-{}-{}_profile.pkl".format(DATE, PLATE, quadrant))
    # ds_profile = cpp.load_pkl("{}-{}-{}_profile.pkl".format(DATE, PLATE, quadrant))

    ds_report = ds_profile.sort_values(["Toxic", "Activity"], ascending=[True, False])
    # ds_report = ds_profile.remove_toxic()[0].sort_values("Activity", ascending = False)
    # ds_report.data = ds_report.data.head(10)

    cpr.full_report(ds_report, SRC_DIR, report_name=REPORTNAME,
                    plate="{}-{}".format(PLATE, quadrant), highlight=True)

    if UPDATE_DATASTORE:
        ds_profile.update_datastore(mode="cpd", write=False)

if UPDATE_SIMILAR:
    cpp.write_sim_refs()
if UPDATE_DATASTORE:
    cpp.write_datastore()


Processing plate 170530_SI0012-1_conf170511mpc ...
* load dataset:          ( 3456 | 1937)
* group on well:         (  384 | 1937)
* remove skipped:        (  384 | 1937)    (  0 skipped ())
* well type from pos:    (  384 | 1938)
* flag toxic:            (  384 | 1940)    ( 11 flagged)
* activity profile:      (  384 |    9)
* join layout 1536:      (  352 |   13)
* join smiles:           (  352 |   16)
* subset:                (  352 |   12)
* sort_values:           (  352 |   12)

In [ ]:
cpp.write_datastore()

In [ ]:
cpp.clear_resources()

Report Current Plate with Existing Data

Report a Cell Painting screening plate in 384 format with pre-generated data.


In [5]:
DATE       = "170530"  # "170704", "170530"
PLATE      = "SI0012"
CONF       = "conf170511mpc"  # "conf170623mpc", "conf170511mpc"
QUADRANTS  = [1]  # [1, 2, 3, 4]

for quadrant in QUADRANTS:
    SRC_DIR    = "/home/pahl/comas/projects/painting/{}-{}-{}_{}".format(DATE, PLATE, quadrant, CONF)
    REPORTNAME = "report_{}-{}".format(PLATE, quadrant)
    # REPORTNAME = "report"

    cpt.create_dirs(op.join(REPORTNAME, "details"))

    print("\nProcessing plate {}_{}-{}_{} ...".format(DATE, PLATE, quadrant, CONF))
    ds_profile = cpp.load_pkl("{}-{}-{}_profile.pkl".format(DATE, PLATE, quadrant))

    ds_report = ds_profile.sort_values(["Toxic", "Activity"], ascending=[True, False])
    # ds_report = ds_profile.remove_toxic()[0].sort_values("Activity", ascending = False)
    # ds_report.data = ds_report.data.head(10)

    cpr.full_report(ds_report, SRC_DIR, report_name=REPORTNAME,
                    plate="{}-{}".format(PLATE, quadrant), highlight=True)


cellpainting.processing                       (commit: 7e77cd2 ( 2017-08-05 22:51:25 ))
cellpainting.reporting                        (commit: 7e77cd2 ( 2017-08-05 22:51:25 ))

Processing plate 170530_SI0012-1_conf170511mpc ...
* load pickle:           (  352 |   12)
* sort_values:           (  352 |   12)
* creating overview...
- loading resource:                        (SIM_REFS)
* creating detailed reports...
  * loading control images...
  * writing individual reports...
- loading resource:                        (REFERENCES)
* done.

Reference Plates


In [ ]:
REF_DIR     = "/home/pahl/comas/projects/painting/references"
PLATE_NAMES = ["S0195", "S0198", "S0203"]  # "S0195", "S0198", "S0203"
DATES       = {"S0195": "170523", "S0198": "170516", "S0203": "170512"}
REPORTNAME  = "references"

cpt.create_dirs(op.join(REPORTNAME, "details"))

pb = nbt.ProgressbarJS()
ds_ref = cpp.load("references_act_prof.tsv")
num_steps = 4 * len(PLATE_NAMES)
step = 0
for plate in PLATE_NAMES:
    for idx in range(1, 5):
        step += 1
        pb.update(100 * step / num_steps)
        SRC_DIR = "{}/{}-{}".format(REF_DIR, plate, idx)
        print("\nProcessing plate {}-{} ...".format(plate, idx))
        ds_profile = ds_ref[ds_ref["Plate"] == "{}-{}-{}".format(DATES[plate], plate, idx)].copy()
    
        ds_report = ds_profile.sort_values(["Toxic", "Activity"], ascending=[True, False])
        cpr.full_report(ds_profile, SRC_DIR, report_name=REPORTNAME,
                        plate="{}-{}".format(plate, idx), highlight=True, mode="ref")
pb.done()

In [ ]: