Quickstart: Graphing Les Misérables

This simple example from our quickstart walkthrough introduces the basics of PyGraphistry. We also have more advanced tutorials avaiable.

You can download this notebook to run it locally.


In [1]:
import pandas
import graphistry

#graphistry.register(key='MY_API_KEY', server='labs.graphistry.com') #https://www.graphistry.com/api-request

In [18]:
# Parse CSV using Pandas
links_df = pandas.read_csv('../../data/lesmiserables.csv')

# Preview
links_df.sample(3)


Out[18]:
source target value
95 Pontmercy Thenardier 1
16 Isabeau Valjean 1
179 Bossuet Enjolras 10

In [19]:
# Configure graph to use the source/target columns as source/destination of edges
g = graphistry.bind(source='source', destination='target').edges(links_df)

# Upload & plot
g.plot()


Out[19]:

In [93]:
# New graph adding the number of encounters to edge labels.
g2 = g.bind(edge_title='value')
g2.plot(links_df)


Out[93]:

Size and color nodes by community


In [94]:
outgoing_df = links_df.rename(columns={'source': 'node'}).groupby('node')\
  .agg({'target': {'count': 'count', 'community': lambda x: sorted(x)[0]}})

incoming_df = links_df.rename(columns={'target': 'node'}).groupby('node')\
  .agg({'source': {'count': 'count', 'community': lambda x: sorted(x)[0]}})

incoming_df.sample(3)


Out[94]:
source
count community
node
Joly 2 Grantaire
Dahlia 2 Fantine
Judge 4 Brevet

In [95]:
merged_df = outgoing_df.join(incoming_df, how='outer').reset_index()
merged_df.columns = list(map(''.join, merged_df.columns.values))
merged_df = merged_df.fillna(value={'sourcecount': 0, 'sourcecommunity': 'z', 'targetcount': 0, 'targetcommunity': 'z'})


nodes_df = merged_df.assign(
    size=merged_df.apply(lambda row: row['targetcount'] + row['sourcecount'], axis=1),
    community=merged_df.apply(lambda row: min(row['sourcecommunity'], row['targetcommunity']), axis=1))

community = list(nodes_df['community'].unique())
nodes_df['color'] = nodes_df['community'].apply(lambda item: community.index(item) % 12)

nodes_df = nodes_df [['node', 'size', 'color']]
nodes_df.sample(3)


Out[95]:
node size color
13 Child1 2.0 11
1 Babet 10.0 1
59 Montparnasse 9.0 9

In [96]:
g3 = g2.nodes(nodes_df).bind(node='node', point_color='color', point_size='size')

g3.plot()


Out[96]:

In [ ]: