The attribute is the inchi string I'm writing them to a new folder "pactolus_trees_with_inchi" not readable to openmsi users, but its writeable by us
When you are happy with the result:
chgrp -R m1541
chmod -R 750 "foldername"
so the users can have read access to the updated tree files
In [1]:
# %matplotlib notebook
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from matplotlib import pylab as plt
import sys
import glob, os
curr_ld_lib_path = ''
os.environ['LD_LIBRARY_PATH'] = curr_ld_lib_path + ':/project/projectdirs/openmsi/jupyterhub_libs/boost_1_55_0/lib' + ':/project/projectdirs/openmsi/jupyterhub_libs/lib'
import sys
# sys.path.remove('/anaconda/lib/python2.7/site-packages')
sys.path.append('/global/project/projectdirs/openmsi/jupyterhub_libs/anaconda/lib/python2.7/site-packages')
sys.path.insert(0,'/project/projectdirs/openmsi/projects/meta-iq/pactolus/pactolus' )
from generate_frag_dag import *
import score_frag_dag
sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )
from metatlas import metatlas_objects as metob
from metatlas import h5_query as h5q
from metatlas import mzml_to_hdf
import h5py
import tables
import pickle
from rdkit import Chem
# from rdkit.Chem.rdMolDescriptors import ExactMolWt
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem import rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from copy import deepcopy
from pyteomics import mgf
from rdkit.Chem.Draw import IPythonConsole
from IPython.display import SVG,display
In [2]:
datafile = []
datafile.append('inchi_tables/enzo_and_img_abc_inchis.txt')
datafile.append('inchi_tables/gnps_molecule_inchis.txt')
datafile.append('inchi_tables/neutral_organic_compounds_from_metacyc.txt')
inchi = []
for d in datafile:
with open(d) as fid:
for line in fid:
inchi.append(line.strip())
inchi = np.unique(inchi)
inchi = inchi[1:] #remove the empty line break TODO: verify that its empty line.
inchi_with_key = []
#This list contains the inchi, inchi-key, rdkit-mol, and rdkit-mol with Hs
#TODO: store metatlas compound ID
for chi in inchi:
myMol = Chem.MolFromInchi(chi)
inchi_with_key.append((chi,Chem.InchiToInchiKey(chi),myMol,Chem.AddHs(myMol)))
print len(inchi_with_key)
print inchi_with_key[0]
In [3]:
path_to_trees = '/project/projectdirs/openmsi/projects/pactolus_trees_with_inchi/'
all_my_h5_files = glob.glob('/project/projectdirs/openmsi/projects/pactolus_trees_with_inchi/*_hdf5_5_*.h5')
for myFile in all_my_h5_files:
f = h5py.File(myFile, 'r+')
matches = [x[0] for x in inchi_with_key if x[1] == f.keys()[0]][0]
if len(matches) > 0:
f.attrs['inchi'] = matches
else:
print myFile
f.close()
In [50]:
%%javascript
var nb = IPython.notebook;
var kernel = IPython.notebook.kernel;
var command = "NOTEBOOK_FULL_PATH = '" + nb.base_url + nb.notebook_path + "'";
kernel.execute(command);
In [51]:
filename = os.path.basename(NOTEBOOK_FULL_PATH)
%system cp $filename /project/projectdirs/openmsi/www/
temp = '%s/%s'%('/project/projectdirs/openmsi/www',filename)
%system chmod 775 $temp
print 'http://nbviewer.ipython.org/url/portal.nersc.gov/project/openmsi/%s?flush_cache=true'%filename
In [ ]: