In [1]:
import pymongo

In [2]:
from pymongo import MongoClient

In [3]:
client = MongoClient()

In [4]:
db = client["mightyscreen"]
db


Out[4]:
Database(MongoClient('localhost', 27017), u'mightyscreen')

In [5]:
db.collection_names()


Out[5]:
[u'system.indexes',
 u'library',
 u'compound',
 u'proj_data_1',
 u'fs.chunks',
 u'fs.files',
 u'file_document',
 u'view',
 u'proj_data_2',
 u'proj_data_3',
 u'proj_data_4',
 u'proj_data_5']

In [6]:
d = db['proj_data_3']

In [7]:
d.find_one()


Out[7]:
{u'_id': ObjectId('52fab0ccbe22a455e18255ea'),
 u'comment': [],
 u'create_by': 1,
 u'create_date': datetime.datetime(2014, 2, 13, 0, 10, 8, 346000),
 u'hit': 0,
 u'library': u'ICCB_HMS',
 u'plate': u'1922',
 u'platewell': u'1922A01',
 u'readout': {u'FI': [65875594.0, 63045433.0],
  u'FP': [146.4, 159.6],
  u'P_channel': [28866991.0, 27533920.0],
  u'S_channel': [8141612.0, 7977593.0]},
 u'score': {},
 u'well': u'A01',
 u'welltype': u'E'}

In [8]:
l = db["compound"]

In [9]:
l.find_one()


Out[9]:
{u'_id': ObjectId('52e7d0dabe22a46180f3aee6'),
 u'canonical_smiles': u'O=C(c1ccccc1I)NCCOCc1ccccc1\n',
 u'chemical_name': u'0',
 u'facility_reagent_id': u'ICCB-00111290',
 u'formula': u'C16H16INO2',
 u'fp2': u'00000200c024000000060001000003008000000100880000c0002000000004000040000c12000000080286200001004000000000000001000200000010000000020800080000140802010000000088424000800000201000000008400000030002001000000000000040c0000000009080010600001801000000480000000008',
 u'fp3': u'000000040001b030',
 u'fp4': u'0000800000000000008000000000000a000005400000200000000000000000004002000000002840000000000000000000000000000000000000000000000000',
 u'inchi': u'InChI=1/C16H16INO2/c17-15-9-5-4-8-14(15)16(19)18-10-11-20-12-13-6-2-1-3-7-13/h1-9H,10-12H2,(H,18,19)',
 u'inchikey': u'SXMBTNXWVVBMOR-UHFFFAOYSA-N\n',
 u'library_name': u'ICCB_HMS',
 u'logp': 3.63,
 u'molecular_weight': 381.21,
 u'plate': u'803',
 u'plate_well': u'803B15',
 u'pubchem_cid': u'2222845',
 u'sdf': u'Structure37473\n OpenBabel12091311333D\nStructure written by MMmdl.\n 20 21  0  0  1  0  0  0  0  0999 V2000\n   10.9148    2.0997    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    9.6895    2.7776    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   12.1396    2.7776    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    9.7187    4.1998    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n    8.4649    2.0997    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0\n   12.1688    4.1998    0.0000 I   0  0  0  0  0  0  0  0  0  0  0  0\n   10.9148    0.6780    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    2.4133    2.8359    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    4.8194    2.7776    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n   13.3645    2.0997    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    7.2692    2.7776    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    3.6382    2.0997    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    6.0444    2.0997    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    2.4133    4.2581    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    1.2102    2.1580    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   12.1032    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   13.3645    0.6780    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    0.0000    2.8359    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    1.2102    4.9360    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    0.0000    4.2581    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n  1  2  1  0  0  0  0\n  1  3  2  0  0  0  0\n  1  7  1  0  0  0  0\n  2  4  2  0  0  0  0\n  2  5  1  0  0  0  0\n  3  6  1  0  0  0  0\n  3 10  1  0  0  0  0\n  5 11  1  0  0  0  0\n  7 16  2  0  0  0  0\n  8 12  1  0  0  0  0\n  8 14  1  0  0  0  0\n  8 15  2  0  0  0  0\n  9 13  1  0  0  0  0\n  9 12  1  0  0  0  0\n 10 17  2  0  0  0  0\n 11 13  1  0  0  0  0\n 14 19  2  0  0  0  0\n 15 18  1  0  0  0  0\n 16 17  1  0  0  0  0\n 18 20  2  0  0  0  0\n 19 20  1  0  0  0  0\nM  END\n>  <Library>\nChemBridge Microformats\n\n>  <Plate>\n803\n\n>  <Well>\nB15\n\n>  <Plate_Well>\n803B15\n\n>  <Facility_Reagent_ID>\nICCB-00111290\n\n>  <PubChem_CID>\n2222845\n\n>  <Smiles>\nc1(ccccc1I)C(NCCOCc2ccccc2)=O\n\n>  <InChI>\nInChI=1/C16H16INO2/c17-15-9-5-4-8-14(15)16(19)18-10-11-20-12-13-6-2-1-3-7-13/h1-9H,10-12H2,(H,18,19)\n\n>  <Molecular_Weight>\n381.20821\n\n>  <Formula>\nC16H16INO2\n\n>  <Fp2>\n00000200c024000000060001000003008000000100880000c0002000000004000040000c12000000080286200001004000000000000001000200000010000000020800080000140802010000000088424000800000201000000008400000030002001000000000000040c0000000009080010600001801000000480000000008\n\n>  <Fp3>\n000000040001b030\n\n>  <Fp4>\n0000800000000000008000000000000a000005400000200000000000000000004002000000002840000000000000000000000000000000000000000000000000\n\n>  <TPSA>\n38.33\n\n>  <logP>\n3.6287\n\n>  <Canonical_Smiles>\nO=C(c1ccccc1I)NCCOCc1ccccc1\n\n\n>  <InChiKey>\nSXMBTNXWVVBMOR-UHFFFAOYSA-N\n\n\n$$$$\n',
 u'sub_library_name': u'ChemBridge Microformats',
 u'svg': u'<?xml version="1.0"?>\n<svg version="1.1" id="topsvg"\nxmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"\nxmlns:cml="http://www.xml-cml.org/schema" x="0" y="0" width="200px" height="200px" viewBox="0 0 100 100">\n<title>OBDepict</title>\n<rect x="0" y="0" width="100" height="100" fill="white"/>\n<text text-anchor="middle" font-size="6" fill ="black" font-family="sans-serif"\nx="50" y="98" >Structure37473</text>\n<g transform="translate(0,0)">\n<svg width="100" height="100" x="0" y="0" viewBox="0 0 461.395 220.863"\nfont-family="sans-serif" stroke="rgb(0,0,0)" stroke-width="2"  stroke-linecap="round">\n<line x1="351.5" y1="120.9" x2="316.5" y2="101.6" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="313.5" y1="101.5" x2="314.1" y2="73.9" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="319.5" y1="101.7" x2="320.1" y2="74.1" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="316.5" y1="101.6" x2="292.9" y2="114.6" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="386.4" y1="101.6" x2="387.0" y2="74.0" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="270.3" y1="114.5" x2="247.4" y2="101.6" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="108.9" y1="99.9" x2="143.8" y2="120.9" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="188.9" y1="107.9" x2="212.5" y2="120.9" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="166.3" y1="108.1" x2="143.8" y2="120.9" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="247.4" y1="101.6" x2="212.5" y2="120.9" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="351.5" y1="120.9" x2="351.5" y2="161.5" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="351.5" y1="161.5" x2="385.4" y2="180.9" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="360.3" y1="158.2" x2="383.8" y2="171.6" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="385.4" y1="180.9" x2="421.4" y2="161.5" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="421.4" y1="161.5" x2="421.4" y2="120.9" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="414.2" y1="155.5" x2="414.2" y2="126.9" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="421.4" y1="120.9" x2="386.4" y2="101.6" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="386.4" y1="101.6" x2="351.5" y2="120.9" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="384.7" y1="110.8" x2="360.2" y2="124.3" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="108.9" y1="99.9" x2="74.5" y2="119.3" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="100.1" y1="96.6" x2="76.2" y2="110.1" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="74.5" y1="119.3" x2="40.0" y2="99.9" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="40.0" y1="99.9" x2="40.0" y2="59.3" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="47.2" y1="93.9" x2="47.2" y2="65.3" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="40.0" y1="59.3" x2="74.5" y2="40.0" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="74.5" y1="40.0" x2="108.9" y2="59.3" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="76.2" y1="49.2" x2="100.1" y2="62.7" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<line x1="108.9" y1="59.3" x2="108.9" y2="99.9" stroke="rgb(0,0,0)"  stroke-width="2.0"/>\n<text x="311.351784" y="69.009640" fill="rgb(255,12,12)"  stroke="rgb(255,12,12)" stroke-width="1" font-size="16" >O</text>\n<text x="275.570901" y="128.942190" fill="rgb(12,12,255)"  stroke="rgb(12,12,255)" stroke-width="1" font-size="16" >N</text>\n<text x="275.570901" y="144.942190" fill="rgb(12,12,255)"  stroke="rgb(12,12,255)" stroke-width="1" font-size="16" >H</text>\n<text x="381.272618" y="69.009640" fill="rgb(147,0,147)"  stroke="rgb(147,0,147)" stroke-width="1" font-size="16" >I</text>\n<text x="171.535801" y="109.596313" fill="rgb(255,12,12)"  stroke="rgb(255,12,12)" stroke-width="1" font-size="16" >O</text>\n</svg>\n</g>\n</svg>\n\n',
 u'tpsa': 38.33,
 u'well': u'B15'}

In [10]:
import pandas as pd
import numpy as np

In [11]:
d1 = pd.DataFrame(list(d.find()))

In [12]:
d1.shape


Out[12]:
(768, 13)

In [13]:
d1.columns


Out[13]:
Index([u'_id', u'comment', u'compound', u'create_by', u'create_date', u'hit', u'library', u'plate', u'platewell', u'readout', u'score', u'well', u'welltype'], dtype='object')

In [14]:
l.find_one({"plate_well":"1922A03"})['fp2']


Out[14]:
u'040200008004000000060000010000000000001000000000c000020000002000004000042000000008028700400208000001000000000000030000001000000000000010201001400000000102018804400088000400000400000860140000000000100008000000000000000000000080010600000001000000081000008002'

In [15]:
d1['fp2'] = d1['platewell'].map(lambda x: "" if not l.find_one({"plate_well":x}) else l.find_one({"plate_well":x})['fp2'])

In [16]:
d1.head()['fp2']


Out[16]:
0                                                     
1                                                     
2    0402000080040000000600000100000000000010000000...
3    0000060000004000000600030c00000000000000000000...
4    0001000000000000000620000000000800000004000000...
Name: fp2, dtype: object

In [17]:
for i in "fp2 fp3 fp4 logp molecular_weight inchikey svg formula sub_library_name chemical_name".split():
    d1[i] = d1['platewell'].map(lambda x: "" if not l.find_one({"plate_well":x}) else l.find_one({"plate_well":x})[i])

In [22]:
d1.head()["fp2 fp3 fp4 logp molecular_weight inchikey svg formula sub_library_name chemical_name fpA fpB fiA fiB".split()]


Out[22]:
fp2 fp3 fp4 logp molecular_weight inchikey svg formula sub_library_name chemical_name fpA fpB fiA fiB
0 146.4 159.6 65875594 63045433
1 169.6 184.2 55839081 54349162
2 0402000080040000000600000100000000000010000000... 000000070009f000 0000000000000000008000000000000600000540080004... 2.57 200.58 GFGSZUNNBQXGMK-UHFFFAOYSA-N\n <?xml version="1.0"?>\n<svg version="1.1" id="... C7H5ClN2O3 NINDS Custom Collection 2 Aklomide 114.5 132.0 73239803 68441645
3 0000060000004000000600030c00000000000000000000... 1c0000040001b000 0240180300000080008000000000001200000000000000... 3.24 467.64 FTLDJPRFCGDUFH-UHFFFAOYSA-N\n <?xml version="1.0"?>\n<svg version="1.1" id="... C28H41N3O3 NINDS Custom Collection 2 Oxethazaine 176.5 192.0 52843942 51216050
4 0001000000000000000620000000000800000004000000... 000000000021b000 080000000000000000000000000000000020000000d000... 1.31 100.14 RAIPHJJURHTUIC-UHFFFAOYSA-N\n <?xml version="1.0"?>\n<svg version="1.1" id="... C3H4N2S NINDS Custom Collection 2 Aminothiazole 150.2 158.9 69225335 69672408

5 rows × 14 columns


In [20]:
d1['fpA'] = d1['readout'].map(lambda x: x['FP'][0]) 
d1['fpB'] = d1['readout'].map(lambda x: x['FP'][1]) 
d1['fiA'] = d1['readout'].map(lambda x: x['FI'][0]) 
d1['fiB'] = d1['readout'].map(lambda x: x['FI'][1])

In [23]:
d1.to_csv("demo_data.csv")