Quickstart: Graphing Les Misérables

This simple example from our quickstart walkthrough introduces the basics of PyGraphistry. We also have more advanced tutorials avaiable.

You can download this notebook to run it locally.



In [1]:

    
import pandas
import graphistry

#graphistry.register(key='MY_API_KEY', server='labs.graphistry.com') #https://www.graphistry.com/api-request



In [18]:

    
# Parse CSV using Pandas
links_df = pandas.read_csv('../../data/lesmiserables.csv')

# Preview
links_df.sample(3)









    Out[18]:







  
    
      
      source
      target
      value
    
  
  
    
      95
      Pontmercy
      Thenardier
      1
    
    
      16
      Isabeau
      Valjean
      1
    
    
      179
      Bossuet
      Enjolras
      10



In [19]:

    
# Configure graph to use the source/target columns as source/destination of edges
g = graphistry.bind(source='source', destination='target').edges(links_df)

# Upload & plot
g.plot()









    Out[19]:



In [93]:

    
# New graph adding the number of encounters to edge labels.
g2 = g.bind(edge_title='value')
g2.plot(links_df)









    Out[93]:

Size and color nodes by community



In [94]:

    
outgoing_df = links_df.rename(columns={'source': 'node'}).groupby('node')\
  .agg({'target': {'count': 'count', 'community': lambda x: sorted(x)[0]}})

incoming_df = links_df.rename(columns={'target': 'node'}).groupby('node')\
  .agg({'source': {'count': 'count', 'community': lambda x: sorted(x)[0]}})

incoming_df.sample(3)









    Out[94]:







  
    
      
      source
    
    
      
      count
      community
    
    
      node
      
      
    
  
  
    
      Joly
      2
      Grantaire
    
    
      Dahlia
      2
      Fantine
    
    
      Judge
      4
      Brevet



In [95]:

    
merged_df = outgoing_df.join(incoming_df, how='outer').reset_index()
merged_df.columns = list(map(''.join, merged_df.columns.values))
merged_df = merged_df.fillna(value={'sourcecount': 0, 'sourcecommunity': 'z', 'targetcount': 0, 'targetcommunity': 'z'})


nodes_df = merged_df.assign(
    size=merged_df.apply(lambda row: row['targetcount'] + row['sourcecount'], axis=1),
    community=merged_df.apply(lambda row: min(row['sourcecommunity'], row['targetcommunity']), axis=1))

community = list(nodes_df['community'].unique())
nodes_df['color'] = nodes_df['community'].apply(lambda item: community.index(item) % 12)

nodes_df = nodes_df [['node', 'size', 'color']]
nodes_df.sample(3)









    Out[95]:







  
    
      
      node
      size
      color
    
  
  
    
      13
      Child1
      2.0
      11
    
    
      1
      Babet
      10.0
      1
    
    
      59
      Montparnasse
      9.0
      9



In [96]:

    
g3 = g2.nodes(nodes_df).bind(node='node', point_color='color', point_size='size')

g3.plot()









    Out[96]:



In [ ]: