In [236]:
import warnings

# Our numerical workhorses
import numpy as np
import pandas as pd
import scipy.stats as st

# Import pyplot for plotting
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d

# Seaborn, useful for graphics
import seaborn as sns

# Magic function to make matplotlib inline; other style specs must come AFTER
%matplotlib inline

# This enables SVG graphics inline.  There is a bug, so uncomment if it works.
%config InlineBackend.figure_formats = {'png', 'retina'}

# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style('darkgrid', rc=rc)

# Import Bokeh modules for interactive plotting
import bokeh.charts
import bokeh.charts.utils
import bokeh.io
import bokeh.models
import bokeh.palettes
import bokeh.plotting

# Display graphics in this notebook
bokeh.io.output_notebook()

# Suppress future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from sklearn.utils.extmath import cartesian

import networkx as nx


BokehJS successfully loaded.

In [165]:
fname= '../input/c_elegans_gene_interactions.txt'
maxL= 0
i= 0
with open(fname, 'r') as f:
    for line in f:
        if line[0] != '#':
            l= line.split('\t') 
            if len(l) > maxL:
                maxL= len(l)

print(maxL) #as of Jan 1, 2016, maxL was 44 -- this is the number of rows in the df


44

In [166]:
maxL= 44
fname= '../input/c_elegans_gene_interactions.txt'
series= []
with open(fname, 'r') as f:
    for line in f:
        if line[0] != '#':
            line= line.rstrip()
            l= line.split('\t') 
            empty= [np.nan]*(maxL - len(l))
            l= l+empty
            series.append(l)
df= pd.DataFrame(data= series)

In [198]:
# WBInteractionID	Interaction_type	Interaction_subtype	Summary	Citation	Interactor1	Common-name	Role1	Interactor2	Common-name	Role2	...
columns_main= ['WBInteractionID', 'interaction_type', 'interaction_subtype', 'summary', 'citation']
column_repeat= ['interactor','common_name','role']

In [222]:
columns_sub= []
columns_sub_no_name= []

#max number of interactors in this df
maxInteractors= (maxL - len(columns_main))/len(column_repeat)


for i in np.arange(1, maxInteractors+1):
    for word in column_repeat:
        word= word+str(int(i))
        columns_sub= columns_sub+[word]
        if word != 'common_name'+str(int(i)):
            columns_sub_no_name= columns_sub_no_name+[word]

columns= columns_main+columns_sub

In [232]:
#try to clean up the 
maxL= 44
fname= '../input/c_elegans_gene_interactions.txt'
series= []

with open(fname, 'r') as f:
    for line in f:
        c= []
        
        if line[0] != '#':  
            
            line= line.rstrip()
            l= line.split('\t') 
            keys= columns[0:len(l)]
            d= dict(zip(keys, l))
            
            if d['interaction_type'] == 'Regulatory':
                
            
                eff_indices= np.where(np.array([x == 'Trans_regulator' for x in l]))[0]
                aff_indices= np.where(np.array([x == 'Trans_regulated' for x in l]))[0]
                
                d= dict(zip(keys, l))
                interactors= (len(l) - len(columns_main))/3

                #if no multiple interactors, assume it's autoregulatory
                #only accept trans_regulator roles:
                if interactors == 1 and d['role1'] == 'Trans_regulator':
                    
                    
                    d['role1']= 'Trans_regulated' #to denote auto-regulation
                    mykeys= ['interactor1','common_name1','role1']
                    add= [d[x] for x in mykeys]
                    
                    c= l+add      
                    series.append(c)
                    
                else:
                    
                    
                    combos= cartesian([eff_indices, aff_indices])
                    #print(aff_indices)
                    for pair in combos:
                        eff_ind= pair[0]
                        aff_ind= pair[1]
                        
                        #info on the effector:
                        effector= l[eff_ind-2:eff_ind+1]
                        affector= l[aff_ind-2: aff_ind+1]
                        
                        
                        c= l[:len(columns_main)] + effector + affector
                        series.append(c)

cols= columns_main+ ['interactor1','common_name1','role1']+['interactor2','common_name2','role2']
df= pd.DataFrame(data= series, columns= cols)

In [233]:
df= df.drop(['interaction_type', 'interaction_subtype', 'summary', 'citation'],1)
df.head()


Out[233]:
WBInteractionID interactor1 common_name1 role1 interactor2 common_name2 role2
0 WBInteraction000003067 WBGene00003003 lin-14 Trans_regulator WBGene00001080 dpy-21 Trans_regulated
1 WBInteraction000003068 WBGene00001085 dpy-26 Trans_regulator WBGene00003003 lin-14 Trans_regulated
2 WBInteraction000003069 WBGene00001086 dpy-27 Trans_regulator WBGene00003003 lin-14 Trans_regulated
3 WBInteraction000003070 WBGene00001087 dpy-28 Trans_regulator WBGene00003003 lin-14 Trans_regulated
4 WBInteraction000003071 WBGene00006962 xol-1 Trans_regulator WBGene00004745 sdc-1 Trans_regulated

In [234]:
df.shape


Out[234]:
(7249, 7)

In [287]:
g= nx.from_pandas_dataframe(df, 'interactor1', 'interactor2', nx.DiGraph())

In [249]:
nx.draw(g)



In [296]:
#degreeness of the network
for key, value in nx.degree(g).items():
    if np.size(val)== 0:
        val= np.array(value)
    else:
        val= np.append(val, value)
    if value == 0:
        print('uhoh')
        
print(val)


[6 3 1 ..., 3 2 2]

In [299]:
n_bins= np.floor(np.cbrt(len(val))) #rule of thumb, but use cbrt since we are using log-bins
MIN= min(val)
MAX= max(val)

#plot histogram with log spaced bins
plt.hist(val, bins = 10 ** np.linspace(np.log10(MIN), np.log10(MAX), n_bins))
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.gca().set_xlabel('Node Degree')
plt.gca().set_ylabel('Frequency')
plt.gca().set_title('Log-Log Histogram of Node Degree Frequency')


Out[299]:
<matplotlib.text.Text at 0x11af205f8>

In [298]:
conn_comp= nx.connected_component_subgraphs(g)

In [277]:
print(conn_comp)


<generator object connected_component_subgraphs at 0x116a5a2b0>

In [281]:
print(nx.degree_histogram(g))


[0, 1377, 526, 224, 176, 117, 67, 68, 53, 39, 29, 24, 27, 28, 14, 21, 12, 5, 5, 7, 4, 9, 5, 3, 7, 2, 4, 3, 2, 2, 4, 1, 0, 2, 0, 0, 3, 1, 0, 0, 0, 1, 0, 0, 3, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]

In [282]:
plt.plot(nx.degree_histogram(g))


Out[282]:
[<matplotlib.lines.Line2D at 0x1197ab358>]

In [300]:
nx.draw_graphviz(g)


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-300-7c0c772b1735> in <module>()
----> 1 nx.draw_graphviz(g)

/Users/davidangeles/anaconda/envs/py35/lib/python3.5/site-packages/networkx/drawing/nx_pylab.py in draw_graphviz(G, prog, **kwargs)
    980        See networkx.draw_networkx() for a description of optional keywords.
    981     """
--> 982     pos = nx.drawing.graphviz_layout(G, prog)
    983     draw(G, pos, **kwargs)
    984 

/Users/davidangeles/anaconda/envs/py35/lib/python3.5/site-packages/networkx/drawing/nx_pydot.py in graphviz_layout(G, prog, root, **kwds)
    255     This is a wrapper for pydot_layout.
    256     """
--> 257     return pydot_layout(G=G,prog=prog,root=root,**kwds)
    258 
    259 

/Users/davidangeles/anaconda/envs/py35/lib/python3.5/site-packages/networkx/drawing/nx_pydot.py in pydot_layout(G, prog, root, **kwds)
    269     >>> pos=nx.pydot_layout(G,prog='dot')
    270     """
--> 271     pydot = load_pydot()
    272 
    273     P=to_pydot(G)

/Users/davidangeles/anaconda/envs/py35/lib/python3.5/site-packages/networkx/drawing/nx_pydot.py in load_pydot()
     45     else:
     46         msg = "pydot could not be loaded: http://code.google.com/p/pydot/"
---> 47         raise ImportError(msg)
     48 
     49     return module

ImportError: pydot could not be loaded: http://code.google.com/p/pydot/

In [303]:
import graphviz


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-303-3813d87ba683> in <module>()
----> 1 import graphviz

ImportError: No module named 'graphviz'

In [ ]: