Visualise Highbinders


In [ ]:
%reset -f

import all required dependencies


In [ ]:
# standard imports
import urllib2
import os
import sys
import json
import StringIO
import pickle

# dataframe and numerical
import pandas as pd
import numpy as np

# plotting
import matplotlib.pyplot as plt
%matplotlib inline
# plotting params
from matplotlib import rcParams
rcParams["figure.figsize"] = 10, 8

#scipy
from scipy import stats
from scipy.special import erf
from scipy import sqrt

# glypy
import glypy
from glypy.plot import plot
from glypy.io import glycoct

# other modules
sys.path.append('../scripts/')
import post_glycan_convert as pgc

In [ ]:
## variables for this project

samples_in="../data/galectin-3/galectin-3_5.0_human.json"
results_dir = "../results/galectin-3/"
dataframe_out=results_dir+"dataframes_galectin.pkl"
dataframefile=dataframe_out

In [ ]:
# Check whether or not the dataframes exist

subdir="./"
dataframefile=dataframe_out

if not os.path.isfile(dataframefile):
    print "calling the notebook that loads the data"
    %run download_cfg_for_galectin.ipynb
with open(os.path.join(subdir, dataframefile)) as f:
    dataframes = pickle.load(f)
    
    
dataframes[0]["sample"]
frame=dataframes[0]["dataframe"]
frame.head()

In [ ]:
# create a data frame with the glycan and the spacer as separate columns

Structure="Structure on Masterlist"

df=pd.concat([frame["Chart Number"], frame[Structure]], axis=1)
              
df.head()
#frame["Structure"]
df[Structure].str.extract('(.*-)')
df["Glycan"]=df[Structure].str.extract('(.*-)').str.strip('-')

#df['Structure'].str.extract('(-Sp.+?$)')
df["Spacer"]=df[Structure].str.split('.*-').str[1]
df

In [ ]:
# create a function that plots using glypy


def plotter(func):
    """
    A decorator that plots the function .
    (it actually just prints it, but it could be logging!)
    """
    def wrapper(*args, **kwargs):
        res = func(*args, **kwargs)
        #print func.__name__, args, kwargs
        plot(res,label=True)
        return res
    return wrapper

@plotter
def get_gly_iupac(iupacstring):
    kchandle = StringIO.StringIO(iupacstring)
    kcf=pgc.mechanise_glycan_convert(kchandle, "Kcf", "text")
    kchandle2 = StringIO.StringIO(kcf)
    gct=pgc.mechanise_glycan_convert(kchandle2, "Glycoct", "text")
    return glycoct.loads(gct)

In [ ]:
high_binders_from_paper=[372,
 543,
 545,
 547,
 549,
 550,
 551,
 565,
 566,
 569,
 576,
 577,
 578,
 579,
 580,
 581,
 582,
 583,
 584,
 585,
 586,
 587,
 588,
 589]

In [ ]:
# show all the outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [ ]:
# show high binder glycan rows

pd.set_option('display.max_columns',1000)
pd.set_option('display.max_colwidth', 1000)


for i in high_binders_from_paper:
    #df["Glycan"][i].head()
    df[df["Chart Number"]==i].head()

In [ ]:
highbinders_df = df[df["Chart Number"].isin(high_binders_from_paper)]

In [ ]:
highbinders_df

In [ ]:
df[df["Chart Number"]==372]
%matplotlib inline
get_gly_iupac(df["Glycan"][372])

In [ ]:


In [ ]:
print high_binders_from_paper[0:4]

In [ ]:
# plot glycan images
for i in high_binders_from_paper[0:4]:
    try:
        get_gly_iupac(df["Glycan"][i])
    except:
        pass

In [ ]:


In [ ]: