by Keiichiro Ono - University of California, San Diego Trey Ideker Lab
In [1]:
from py2cytoscape.data.cynetwork import CyNetwork
from py2cytoscape.data.cyrest_client import CyRestClient
from py2cytoscape.data.style import StyleUtil
import py2cytoscape.util.cytoscapejs as cyjs
import py2cytoscape.cytoscapejs as renderer
import networkx as nx
import pandas as pd
import json
In [2]:
# !!!!!!!!!!!!!!!!! Step 0: Start Cytoscape 3 with cyREST App !!!!!!!!!!!!!!!!!!!!!!!!!!
# Step 1: Create py2cytoscape client
cy = CyRestClient()
# Step 2: Load network from somewhere
yeast_net = cy.network.create_from('../tests/data/galFiltered.json')
# Step 3: Load table as pandas' DataFrame
table_data = pd.read_csv('sample_data_table.csv', index_col=0)
table_data.head()
Out[2]:
In [3]:
# Step 4: Merge them in Cytoscape
yeast_net.update_node_table(df=table_data, network_key_col='name')
# Step 5: Apply layout
cy.layout.apply(name='force-directed', network=yeast_net)
# Step 6: Create Visual Style as code (or by hand if you prefer)
my_yeast_style = cy.style.create('GAL Style')
basic_settings = {
# You can set default values as key-value pairs.
'NODE_FILL_COLOR': '#6AACB8',
'NODE_SIZE': 55,
'NODE_BORDER_WIDTH': 0,
'NODE_LABEL_COLOR': '#555555',
'EDGE_WIDTH': 2,
'EDGE_TRANSPARENCY': 100,
'EDGE_STROKE_UNSELECTED_PAINT': '#333333',
'NETWORK_BACKGROUND_PAINT': '#FFFFEA'
}
my_yeast_style.update_defaults(basic_settings)
# Create some mappings
my_yeast_style.create_passthrough_mapping(column='label', vp='NODE_LABEL', col_type='String')
degrees = yeast_net.get_node_column('Degree')
color_gradient = StyleUtil.create_2_color_gradient(min=degrees.min(), max=degrees.max(), colors=('white', '#6AACB8'))
degree_to_size = StyleUtil.create_slope(min=degrees.min(), max=degrees.max(), values=(10, 100))
my_yeast_style.create_continuous_mapping(column='Degree', vp='NODE_FILL_COLOR', col_type='Integer', points=color_gradient)
my_yeast_style.create_continuous_mapping(column='Degree', vp='NODE_SIZE', col_type='Integer', points=degree_to_size)
my_yeast_style.create_continuous_mapping(column='Degree', vp='NODE_LABEL_FONT_SIZE', col_type='Integer', points=degree_to_size)
cy.style.apply(my_yeast_style, yeast_net)
# Step 7: (Optional) Embed as interactive Cytoscape.js widget
yeast_net_view = yeast_net.get_first_view()
style_for_widget = cy.style.get(my_yeast_style.get_name(), data_format='cytoscapejs')
renderer.render(yeast_net_view, style=style_for_widget['style'], background='radial-gradient(#FFFFFF 15%, #DDDDDD 105%)')
From version 0.4.0, py2cytoscape has wrapper modules for cyREST RESTful API. This means you can access Cytoscape features in more Pythonic way instead of calling raw REST API via HTTP.
Since pandas is a standard library for data mangling/analysis in Python, this new version uses its DataFrame as its basic data object.
You can use Cytoscape.js widget to embed your final result as a part of your notebook.
cyREST provides language-agnostic RESTful API, but you need to use a lot of template code to access raw API. Here is an example. Both of the following do the same task, which is creating an empty network in Cytoscape. You will notice it is significantly simpler if you use py2cytoscape wrapper API.
In [4]:
# HTTP Client for Python
import requests
# Standard JSON library
import json
# Basic Setup
PORT_NUMBER = 1234
BASE = 'http://localhost:' + str(PORT_NUMBER) + '/v1/'
# Header for posting data to the server as JSON
HEADERS = {'Content-Type': 'application/json'}
# Define dictionary of empty network
empty_network = {
'data': {
'name': 'I\'m empty!'
},
'elements': {
'nodes':[],
'edges':[]
}
}
res = requests.post(BASE + 'networks?collection=My%20Collection', data=json.dumps(empty_network), headers=HEADERS)
new_network_id = res.json()['networkSUID']
print('New network created with raw REST API. Its SUID is ' + str(new_network_id))
In [5]:
network = cy.network.create(name='My Network', collection='My network collection')
print('New network created with py2cytoscape. Its SUID is ' + str(network.get_id()))
In [6]:
# Create an instance of cyREST client. Default IP is 'localhost', and port number is 1234.
# cy = CyRestClient() - This default constructor creates connection to http://localhost:1234/v1
cy = CyRestClient(ip='127.0.0.1', port=1234)
# Cleanup: Delete all existing networks and tables in current Cytoscape session
cy.session.delete()
In [7]:
# Empty network
empty1 = cy.network.create()
# With name
empty2 = cy.network.create(name='Created in Jupyter Notebook')
# With name and collection name
empty3 = cy.network.create(name='Also created in Jupyter', collection='New network collection')
In [8]:
# Load a single local file
net_from_local2 = cy.network.create_from('../tests/data/galFiltered.json')
net_from_local1 = cy.network.create_from('sample_yeast_network.xgmml', collection='My Collection')
net_from_local2 = cy.network.create_from('../tests/data/galFiltered.gml', collection='My Collection')
# Load from multiple locations
network_locations = [
'sample_yeast_network.xgmml', # Local file
'http://chianti.ucsd.edu/cytoscape-data/galFiltered.sif', # Static file on a web server
'http://www.ebi.ac.uk/Tools/webservices/psicquic/intact/webservices/current/search/query/brca1?format=xml25' # or a web service
]
# This requrns Series
networks = cy.network.create_from(network_locations)
pd.DataFrame(networks, columns=['CyNetwork'])
Out[8]:
In [9]:
# Cytoscape.js JSON
n1 = cy.network.create(data=cyjs.get_empty_network(), name='Created from Cytoscape.js JSON')
In [10]:
# Pandas DataFrame
# Example 1: From a simple text table
df_from_sif = pd.read_csv('../tests/data/galFiltered.sif', names=['source', 'interaction', 'target'], sep=' ')
df_from_sif.head()
Out[10]:
In [11]:
# By default, it uses 'source' for source node column, 'target' for target node column, and 'interaction' for interaction
yeast1 = cy.network.create_from_dataframe(df_from_sif, name='Yeast network created from pandas DataFrame')
# Example 2: from more complicated table
df_from_mitab = pd.read_csv('intact_pubid_22094256.txt', sep='\t')
df_from_mitab.head()
Out[11]:
In [12]:
source = df_from_mitab.columns[0]
target = df_from_mitab.columns[1]
interaction = 'Interaction identifier(s)'
title='A Systematic Screen for CDK4/6 Substrates Links FOXM1 Phosphorylation to Senescence Suppression in Cancer Cells.'
human1 = cy.network.create_from_dataframe(df_from_mitab, source_col=source, target_col=target, interaction_col=interaction, name=title)
# Import edge attributes and node attributes at the same time (TBD)
In [13]:
# NetworkX
nx_graph = nx.scale_free_graph(100)
nx.set_node_attributes(nx_graph, 'Degree', nx.degree(nx_graph))
nx.set_node_attributes(nx_graph, 'Betweenness_Centrality', nx.betweenness_centrality(nx_graph))
scale_free100 = cy.network.create_from_networkx(nx_graph, collection='Generated by NetworkX')
# TODO: igraph
# TODO: Numpy adj. martix
# TODO: GraphX
In [14]:
# As Cytoscape.js (dict)
yeast1_json = yeast1.to_json()
# print(json.dumps(yeast1_json, indent=4))
In [15]:
# As NetworkX graph object
sf100 = scale_free100.to_networkx()
num_nodes = sf100.number_of_nodes()
num_edges = sf100.number_of_edges()
print('Number of Nodes: ' + str(num_nodes))
print('Number of Edges: ' + str(num_edges))
In [16]:
# As a simple, SIF-like DataFrame
yeast1_df = yeast1.to_dataframe()
yeast1_df.head()
Out[16]:
In [17]:
network_suid = yeast1.get_id()
print('This object references to Cytoscape network with SUID ' + str(network_suid) + '\n')
print('And its name is: ' + yeast1.get_network_value(column='name') + '\n')
nodes = yeast1.get_nodes()
edges = yeast1.get_edges()
print('* This network has ' + str(len(nodes)) + ' nodes and ' + str(len(edges)) + ' edges\n')
# Get a row in the node table as pandas Series object
node0 = nodes[0]
row = yeast1.get_node_value(id=node0)
print(row)
# Or, pick one cell in the table
cell = yeast1.get_node_value(id=node0, column='name')
print('\nThis node has name: ' + cell)
In [18]:
# Create a new CyNetwork object from existing network
network_ref1 = cy.network.create(suid=yeast1.get_id())
# And they are considered as same objects.
print(network_ref1 == yeast1)
print(network_ref1.get_network_value(column='name'))
Cytoscape has two main data types: Network and Table. Network is the graph topology, and Tables are properties for those graphs. For simplicity, this library has access to three basic table objects:
For 99% of your use cases, you can use these three to store properties. Since pandas is extremely useful to handle table data, default data type for tables is DataFrame. However, you can also use other data types including:
In [19]:
# Get table from Cytoscape
node_table = scale_free100.get_node_table()
edge_table = scale_free100.get_edge_table()
network_table = scale_free100.get_network_table()
node_table.head()
Out[19]:
In [20]:
network_table.transpose().head()
Out[20]:
In [21]:
names = scale_free100.get_node_column('Degree')
print(names.head())
# Node Column information. "name" is the unique Index
scale_free100.get_node_columns()
Out[21]:
In [22]:
# Add new nodes: Simply send the list of node names. NAMES SHOULD BE UNIQUE!
new_node_names = ['a', 'b', 'c']
# Return value contains dictionary from name to SUID.
new_nodes = scale_free100.add_nodes(new_node_names)
# Add new edges
# Send a list of tuples: (source node SUID, target node SUID, interaction type
new_edges = []
new_edges.append((new_nodes['a'], new_nodes['b'], 'type1'))
new_edges.append((new_nodes['a'], new_nodes['c'], 'type2'))
new_edges.append((new_nodes['b'], new_nodes['c'], 'type3'))
new_edge_ids = scale_free100.add_edges(new_edges)
new_edge_ids
Out[22]:
In [23]:
# Delete node
scale_free100.delete_node(new_nodes['a'])
# Delete edge
scale_free100.delete_edge(new_edge_ids.index[0])
Let's do something a bit more realistic. You can update any Tables by using DataFrame objects.
Let's use ID Conversion web service by Uniprot to add more information to existing yeast network in current session.
In [24]:
# Small utility function to convert ID sets
import requests
import StringIO
def uniprot_id_mapping_service(query=None, from_id=None, to_id=None):
# Uniprot ID Mapping service
url = 'http://www.uniprot.org/mapping/'
payload = {
'from': from_id,
'to': to_id,
'format':'tab',
'query': query
}
res = StringIO.StringIO(requests.get(url, params=payload).content)
df = pd.read_csv(res, sep='\t')
res.close()
return df
In [25]:
# Get node table from Cytoscape
yeast_node_table = yeast1.get_node_table()
# From KEGG ID to UniprotKB ID
query1 = ' '.join(yeast_node_table['name'].map(lambda gene_id: 'sce:' + gene_id).values)
id_map_kegg2uniprot = uniprot_id_mapping_service(query1, from_id='KEGG_ID', to_id='ID')
id_map_kegg2uniprot.columns = ['kegg', 'uniprot']
# From UniprotKB to SGD
query2 = ' '.join(id_map_kegg2uniprot['uniprot'].values)
id_map_uniprot2sgd = uniprot_id_mapping_service(query2, from_id='ID', to_id='SGD_ID')
id_map_uniprot2sgd.columns = ['uniprot', 'sgd']
# From UniprotKB to Entrez Gene ID
query3 = ' '.join(id_map_kegg2uniprot['uniprot'].values)
id_map_uniprot2ncbi = uniprot_id_mapping_service(query3, from_id='ID', to_id='P_ENTREZGENEID')
id_map_uniprot2ncbi.columns = ['uniprot', 'entrez']
# Merge them
merged = pd.merge(id_map_kegg2uniprot, id_map_uniprot2sgd, on='uniprot')
merged = pd.merge(merged, id_map_uniprot2ncbi, on='uniprot')
# Add key column by removing prefix
merged['name'] = merged['kegg'].map(lambda kegg_id : kegg_id[4:])
merged.head()
Out[25]:
In [26]:
# Now update existing node table with the data frame above.
yeast1.update_node_table(merged, network_key_col='name', data_key_col='name')
# Check the table is actually updated
yeast1.get_node_table().head()
Out[26]:
In [27]:
# Delete columns
yeast1.delete_node_table_column('kegg')
# Create columns
yeast1.create_node_column(name='New Empty Double Column', data_type='Double', is_immutable=False, is_list=False)
# Default is String, mutable column.
yeast1.create_node_column(name='Empty String Col')
yeast1.get_node_table().head()
Out[27]:
In [28]:
# Get all existing Visual Styles
import json
styles = cy.style.get_all()
print(json.dumps(styles, indent=4))
# Create a new style
style1 = cy.style.create('sample_style1')
# Get a reference to the existing style
default_style = cy.style.create('default')
print(style1.get_name())
print(default_style.get_name())
# Get all available Visual Properties
print(len(cy.style.vps.get_all()))
# Get Visual Properties for each data type
node_vps = cy.style.vps.get_node_visual_props()
edge_vps = cy.style.vps.get_edge_visual_props()
network_vps = cy.style.vps.get_network_visual_props()
print(pd.Series(edge_vps).head())
In [29]:
# Prepare key-value pair for Style defaults
new_defaults = {
# Node defaults
'NODE_FILL_COLOR': '#eeeeff',
'NODE_SIZE': 20,
'NODE_BORDER_WIDTH': 0,
'NODE_TRANSPARENCY': 120,
'NODE_LABEL_COLOR': 'white',
# Edge defaults
'EDGE_WIDTH': 3,
'EDGE_STROKE_UNSELECTED_PAINT': '#aaaaaa',
'EDGE_LINE_TYPE': 'LONG_DASH',
'EDGE_TRANSPARENCY': 120,
# Network defaults
'NETWORK_BACKGROUND_PAINT': 'black'
}
# Update
style1.update_defaults(new_defaults)
# Apply the new style
cy.style.apply(style1, yeast1)
In [30]:
# Passthrough mapping
style1.create_passthrough_mapping(column='name', col_type='String', vp='NODE_LABEL')
# Discrete mapping: Simply prepare key-value pairs and send it
kv_pair = {
'pp': 'pink',
'pd': 'green'
}
style1.create_discrete_mapping(column='interaction',
col_type='String', vp='EDGE_STROKE_UNSELECTED_PAINT', mappings=kv_pair)
# Continuous mapping
points = [
{
'value': '1.0',
'lesser':'white',
'equal':'white',
'greater': 'white'
},
{
'value': '20.0',
'lesser':'green',
'equal':'green',
'greater': 'green'
}
]
minimal_style = cy.style.create('Minimal')
minimal_style.create_continuous_mapping(column='Degree', col_type='Double', vp='NODE_FILL_COLOR', points=points)
# Or, use utility for simple mapping
simple_slope = StyleUtil.create_slope(min=1, max=20, values=(10, 60))
minimal_style.create_continuous_mapping(column='Degree', col_type='Double', vp='NODE_SIZE', points=simple_slope)
# Apply the new style
cy.style.apply(minimal_style, scale_free100)
In [31]:
# Get list of available layout algorithms
layouts = cy.layout.get_all()
print(json.dumps(layouts, indent=4))
In [32]:
# Apply layout
cy.layout.apply(name='circular', network=yeast1)
yeast1.get_views()
yeast_view1 = yeast1.get_first_view()
node_views = yeast_view1['elements']['nodes']
df3 = pd.DataFrame(node_views)
df3.head()
Out[32]:
In [33]:
from py2cytoscape.cytoscapejs import viewer as cyjs
cy.layout.apply(network=scale_free100)
view1 = scale_free100.get_first_view()
view2 = yeast1.get_first_view()
# print(view1)
cyjs.render(view2, 'default2', background='#efefef')
In [34]:
# Use Cytoscape.js style JSON
cyjs_style = cy.style.get(minimal_style.get_name(), data_format='cytoscapejs')
cyjs.render(view1, style=cyjs_style['style'], background='white')