notebook.community

Edit and run



In [236]:

    
import warnings

# Our numerical workhorses
import numpy as np
import pandas as pd
import scipy.stats as st

# Import pyplot for plotting
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d

# Seaborn, useful for graphics
import seaborn as sns

# Magic function to make matplotlib inline; other style specs must come AFTER
%matplotlib inline

# This enables SVG graphics inline.  There is a bug, so uncomment if it works.
%config InlineBackend.figure_formats = {'png', 'retina'}

# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style('darkgrid', rc=rc)

# Import Bokeh modules for interactive plotting
import bokeh.charts
import bokeh.charts.utils
import bokeh.io
import bokeh.models
import bokeh.palettes
import bokeh.plotting

# Display graphics in this notebook
bokeh.io.output_notebook()

# Suppress future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from sklearn.utils.extmath import cartesian

import networkx as nx









    




    


    

    
        
        BokehJS successfully loaded.



In [165]:

    
fname= '../input/c_elegans_gene_interactions.txt'
maxL= 0
i= 0
with open(fname, 'r') as f:
    for line in f:
        if line[0] != '#':
            l= line.split('\t') 
            if len(l) > maxL:
                maxL= len(l)

print(maxL) #as of Jan 1, 2016, maxL was 44 -- this is the number of rows in the df



In [166]:

    
maxL= 44
fname= '../input/c_elegans_gene_interactions.txt'
series= []
with open(fname, 'r') as f:
    for line in f:
        if line[0] != '#':
            line= line.rstrip()
            l= line.split('\t') 
            empty= [np.nan]*(maxL - len(l))
            l= l+empty
            series.append(l)
df= pd.DataFrame(data= series)



In [198]:

    
# WBInteractionID	Interaction_type	Interaction_subtype	Summary	Citation	Interactor1	Common-name	Role1	Interactor2	Common-name	Role2	...
columns_main= ['WBInteractionID', 'interaction_type', 'interaction_subtype', 'summary', 'citation']
column_repeat= ['interactor','common_name','role']



In [222]:

    
columns_sub= []
columns_sub_no_name= []

#max number of interactors in this df
maxInteractors= (maxL - len(columns_main))/len(column_repeat)


for i in np.arange(1, maxInteractors+1):
    for word in column_repeat:
        word= word+str(int(i))
        columns_sub= columns_sub+[word]
        if word != 'common_name'+str(int(i)):
            columns_sub_no_name= columns_sub_no_name+[word]

columns= columns_main+columns_sub



In [232]:

    
#try to clean up the 
maxL= 44
fname= '../input/c_elegans_gene_interactions.txt'
series= []

with open(fname, 'r') as f:
    for line in f:
        c= []
        
        if line[0] != '#':  
            
            line= line.rstrip()
            l= line.split('\t') 
            keys= columns[0:len(l)]
            d= dict(zip(keys, l))
            
            if d['interaction_type'] == 'Regulatory':
                
            
                eff_indices= np.where(np.array([x == 'Trans_regulator' for x in l]))[0]
                aff_indices= np.where(np.array([x == 'Trans_regulated' for x in l]))[0]
                
                d= dict(zip(keys, l))
                interactors= (len(l) - len(columns_main))/3

                #if no multiple interactors, assume it's autoregulatory
                #only accept trans_regulator roles:
                if interactors == 1 and d['role1'] == 'Trans_regulator':
                    
                    
                    d['role1']= 'Trans_regulated' #to denote auto-regulation
                    mykeys= ['interactor1','common_name1','role1']
                    add= [d[x] for x in mykeys]
                    
                    c= l+add      
                    series.append(c)
                    
                else:
                    
                    
                    combos= cartesian([eff_indices, aff_indices])
                    #print(aff_indices)
                    for pair in combos:
                        eff_ind= pair[0]
                        aff_ind= pair[1]
                        
                        #info on the effector:
                        effector= l[eff_ind-2:eff_ind+1]
                        affector= l[aff_ind-2: aff_ind+1]
                        
                        
                        c= l[:len(columns_main)] + effector + affector
                        series.append(c)

cols= columns_main+ ['interactor1','common_name1','role1']+['interactor2','common_name2','role2']
df= pd.DataFrame(data= series, columns= cols)



In [233]:

    
df= df.drop(['interaction_type', 'interaction_subtype', 'summary', 'citation'],1)
df.head()









    Out[233]:






  
    
      
      WBInteractionID
      interactor1
      common_name1
      role1
      interactor2
      common_name2
      role2
    
  
  
    
      0
      WBInteraction000003067
      WBGene00003003
      lin-14
      Trans_regulator
      WBGene00001080
      dpy-21
      Trans_regulated
    
    
      1
      WBInteraction000003068
      WBGene00001085
      dpy-26
      Trans_regulator
      WBGene00003003
      lin-14
      Trans_regulated
    
    
      2
      WBInteraction000003069
      WBGene00001086
      dpy-27
      Trans_regulator
      WBGene00003003
      lin-14
      Trans_regulated
    
    
      3
      WBInteraction000003070
      WBGene00001087
      dpy-28
      Trans_regulator
      WBGene00003003
      lin-14
      Trans_regulated
    
    
      4
      WBInteraction000003071
      WBGene00006962
      xol-1
      Trans_regulator
      WBGene00004745
      sdc-1
      Trans_regulated



In [234]:

    
df.shape









    Out[234]:





(7249, 7)



In [287]:

    
g= nx.from_pandas_dataframe(df, 'interactor1', 'interactor2', nx.DiGraph())



In [249]:

    
nx.draw(g)



In [296]:

    
#degreeness of the network
for key, value in nx.degree(g).items():
    if np.size(val)== 0:
        val= np.array(value)
    else:
        val= np.append(val, value)
    if value == 0:
        print('uhoh')
        
print(val)









    



[6 3 1 ..., 3 2 2]



In [299]:

    
n_bins= np.floor(np.cbrt(len(val))) #rule of thumb, but use cbrt since we are using log-bins
MIN= min(val)
MAX= max(val)

#plot histogram with log spaced bins
plt.hist(val, bins = 10 ** np.linspace(np.log10(MIN), np.log10(MAX), n_bins))
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.gca().set_xlabel('Node Degree')
plt.gca().set_ylabel('Frequency')
plt.gca().set_title('Log-Log Histogram of Node Degree Frequency')









    Out[299]:





<matplotlib.text.Text at 0x11af205f8>



In [298]:

    
conn_comp= nx.connected_component_subgraphs(g)



In [277]:

    
print(conn_comp)









    



<generator object connected_component_subgraphs at 0x116a5a2b0>



In [281]:

    
print(nx.degree_histogram(g))









    



[0, 1377, 526, 224, 176, 117, 67, 68, 53, 39, 29, 24, 27, 28, 14, 21, 12, 5, 5, 7, 4, 9, 5, 3, 7, 2, 4, 3, 2, 2, 4, 1, 0, 2, 0, 0, 3, 1, 0, 0, 0, 1, 0, 0, 3, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]



In [282]:

    
plt.plot(nx.degree_histogram(g))









    Out[282]:





[<matplotlib.lines.Line2D at 0x1197ab358>]



In [300]:

    
nx.draw_graphviz(g)









    



---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-300-7c0c772b1735> in <module>()
----> 1 nx.draw_graphviz(g)

/Users/davidangeles/anaconda/envs/py35/lib/python3.5/site-packages/networkx/drawing/nx_pylab.py in draw_graphviz(G, prog, **kwargs)
    980        See networkx.draw_networkx() for a description of optional keywords.
    981     """
--> 982     pos = nx.drawing.graphviz_layout(G, prog)
    983     draw(G, pos, **kwargs)
    984 

/Users/davidangeles/anaconda/envs/py35/lib/python3.5/site-packages/networkx/drawing/nx_pydot.py in graphviz_layout(G, prog, root, **kwds)
    255     This is a wrapper for pydot_layout.
    256     """
--> 257     return pydot_layout(G=G,prog=prog,root=root,**kwds)
    258 
    259 

/Users/davidangeles/anaconda/envs/py35/lib/python3.5/site-packages/networkx/drawing/nx_pydot.py in pydot_layout(G, prog, root, **kwds)
    269     >>> pos=nx.pydot_layout(G,prog='dot')
    270     """
--> 271     pydot = load_pydot()
    272 
    273     P=to_pydot(G)

/Users/davidangeles/anaconda/envs/py35/lib/python3.5/site-packages/networkx/drawing/nx_pydot.py in load_pydot()
     45     else:
     46         msg = "pydot could not be loaded: http://code.google.com/p/pydot/"
---> 47         raise ImportError(msg)
     48 
     49     return module

ImportError: pydot could not be loaded: http://code.google.com/p/pydot/



In [303]:

    
import graphviz









    



---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-303-3813d87ba683> in <module>()
----> 1 import graphviz

ImportError: No module named 'graphviz'



In [ ]:

	WBInteractionID	interactor1	common_name1	role1	interactor2	common_name2	role2
0	WBInteraction000003067	WBGene00003003	lin-14	Trans_regulator	WBGene00001080	dpy-21	Trans_regulated
1	WBInteraction000003068	WBGene00001085	dpy-26	Trans_regulator	WBGene00003003	lin-14	Trans_regulated
2	WBInteraction000003069	WBGene00001086	dpy-27	Trans_regulator	WBGene00003003	lin-14	Trans_regulated
3	WBInteraction000003070	WBGene00001087	dpy-28	Trans_regulator	WBGene00003003	lin-14	Trans_regulated
4	WBInteraction000003071	WBGene00006962	xol-1	Trans_regulator	WBGene00004745	sdc-1	Trans_regulated