In [ ]:
%reset -f
In [ ]:
# standard imports
import urllib2
import os
import sys
import json
import StringIO
import pickle
# dataframe and numerical
import pandas as pd
import numpy as np
# plotting
import matplotlib.pyplot as plt
%matplotlib inline
# plotting params
from matplotlib import rcParams
rcParams["figure.figsize"] = 10, 8
#scipy
from scipy import stats
from scipy.special import erf
from scipy import sqrt
# glypy
import glypy
from glypy.plot import plot
from glypy.io import glycoct
# other modules
sys.path.append('../scripts/')
import post_glycan_convert as pgc
In [ ]:
## variables for this project
samples_in="../data/galectin-3/galectin-3_5.0_human.json"
results_dir = "../results/galectin-3/"
dataframe_out=results_dir+"dataframes_galectin.pkl"
dataframefile=dataframe_out
In [ ]:
# Check whether or not the dataframes exist
subdir="./"
dataframefile=dataframe_out
if not os.path.isfile(dataframefile):
print "calling the notebook that loads the data"
%run download_cfg_for_galectin.ipynb
with open(os.path.join(subdir, dataframefile)) as f:
dataframes = pickle.load(f)
dataframes[0]["sample"]
frame=dataframes[0]["dataframe"]
frame.head()
In [ ]:
# create a data frame with the glycan and the spacer as separate columns
Structure="Structure on Masterlist"
df=pd.concat([frame["Chart Number"], frame[Structure]], axis=1)
df.head()
#frame["Structure"]
df[Structure].str.extract('(.*-)')
df["Glycan"]=df[Structure].str.extract('(.*-)').str.strip('-')
#df['Structure'].str.extract('(-Sp.+?$)')
df["Spacer"]=df[Structure].str.split('.*-').str[1]
df
In [ ]:
# create a function that plots using glypy
def plotter(func):
"""
A decorator that plots the function .
(it actually just prints it, but it could be logging!)
"""
def wrapper(*args, **kwargs):
res = func(*args, **kwargs)
#print func.__name__, args, kwargs
plot(res,label=True)
return res
return wrapper
@plotter
def get_gly_iupac(iupacstring):
kchandle = StringIO.StringIO(iupacstring)
kcf=pgc.mechanise_glycan_convert(kchandle, "Kcf", "text")
kchandle2 = StringIO.StringIO(kcf)
gct=pgc.mechanise_glycan_convert(kchandle2, "Glycoct", "text")
return glycoct.loads(gct)
In [ ]:
high_binders_from_paper=[372,
543,
545,
547,
549,
550,
551,
565,
566,
569,
576,
577,
578,
579,
580,
581,
582,
583,
584,
585,
586,
587,
588,
589]
In [ ]:
# show all the outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
In [ ]:
# show high binder glycan rows
pd.set_option('display.max_columns',1000)
pd.set_option('display.max_colwidth', 1000)
for i in high_binders_from_paper:
#df["Glycan"][i].head()
df[df["Chart Number"]==i].head()
In [ ]:
highbinders_df = df[df["Chart Number"].isin(high_binders_from_paper)]
In [ ]:
highbinders_df
In [ ]:
df[df["Chart Number"]==372]
%matplotlib inline
get_gly_iupac(df["Glycan"][372])
In [ ]:
In [ ]:
print high_binders_from_paper[0:4]
In [ ]:
# plot glycan images
for i in high_binders_from_paper[0:4]:
try:
get_gly_iupac(df["Glycan"][i])
except:
pass
In [ ]:
In [ ]: