Get & plot data from the first table in a SQL DB
Further docs
In [ ]:
import graphistry
#pip install graphistry -q
#graphistry.register(protocol='http', server='my.server.com', key='MY_API_KEY')
In [ ]:
user = "graphistry"
pwd = "password"
server = "postgres:5432"
##OPTIONAL: Mount in installation's ${PWD}/.notebooks/db_secrets.json and read in
#import json
#with open('/home/graphistry/notebooks/db_secrets.json') as json_file:
# cfg = json.load(json_file)
# user = cfg['user']
# pwd = cfg['pwd']
# server = cfg['server']
#
## .. The first time you run this notebook, save the secret cfg to the system's persistent notebook folder:
#import json
#with open('/home/graphistry/notebooks/db_secrets.json', 'w') as outfile:
# json.dump({
# "user": "graphistry",
# "pwd": "password",
# "server": "postgres:5432"
# }, outfile)
## Delete ^^^ after use
db_string = "postgres://" + user + ":" + pwd + "@" + server
### Take care not to save a print of the result
In [ ]:
# OPTIONAL: Install ODBC drivers in other environments:
# ! apt-get update
# ! apt-get install -y g++ unixodbc unixodbc-dev
# ! conda install -c anaconda pyodbc=4.0.26 sqlalchemy=1.3.5
In [ ]:
import pandas as pd
import sqlalchemy as db
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, Float, String
from sqlalchemy.orm import sessionmaker
In [ ]:
engine = create_engine(db_string)
Session = sessionmaker(bind=engine)
session = Session()
In [ ]:
table_names = engine.table_names()
", ".join(table_names)
In [ ]:
if 'django_migrations' in table_names:
table = 'django_migrations'
else:
table = table_names[0]
In [ ]:
result = engine.execute("SELECT * FROM \"" + table + "\" LIMIT 1000")
df = pd.DataFrame(result.fetchall(), columns=result.keys())
print("table", table, '# rows', len(df))
df.sample(min(3, len(df)))
Several variants:
In [ ]:
graphistry.hypergraph(df)['graph'].plot()
In [ ]:
graphistry.hypergraph(df, direct=True)['graph'].plot()
In [ ]:
graphistry.hypergraph(df, direct=True,
opts={
'EDGES': {
'id': ['name'],
'applied': ['name'],
'name': ['app']
}
})['graph'].plot()
In [ ]:
g = graphistry.bind(source='name', destination='app').edges(df.assign(name=df['name'].apply(lambda x: 'id_' + x)))
g.plot()
In [ ]:
# Add node bindings..
nodes_df = pd.concat([
df[['name', 'id', 'applied']],
df[['app']].drop_duplicates().assign(\
id = df[['app']].drop_duplicates()['app'], \
name = df[['app']].drop_duplicates()['app'])
], ignore_index=True, sort=False)
g = g.bind(node='id', point_title='name').nodes(nodes_df)
g.plot()
In [ ]:
def explore(sql, *args, **kvargs):
result = engine.execute(sql)
df = pd.DataFrame(result.fetchall(), columns=result.keys())
print('# rows', len(df))
g = graphistry.hypergraph(df, *args, **kvargs)['graph']
return g
In [ ]:
explore("SELECT * FROM django_migrations LIMIT 1000").plot()
In [ ]:
explore("SELECT * FROM django_migrations LIMIT 1000", direct=True).plot()
In [ ]:
explore("SELECT * FROM django_migrations LIMIT 1000")._nodes
In [ ]: