pySankey

Source

https://github.com/Pierre-Sassoulas/pySankey https://github.com/anazalea/pySankey

Installed using pip:

(base) D:\dataanalysis-herocoli-redmetrics>pip install pySankey

Collecting pySankey

Downloading https://files.pythonhosted.org/packages/ad/fc/c2823db63e0efe365275 9a4de2859ed02fe5479bb1ba720605cd789121e5/pySankey-0.0.1-py3-none-any.whl distributed 1.21.8 requires msgpack, which is not installed.

Installing collected packages: pySankey

Successfully installed pySankey-0.0.1

You are using pip version 10.0.1, however version 18.0 is available.

You should consider upgrading via the 'python -m pip install --upgrade pip' comm and.


In [ ]:
import matplotlib
#matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pySankey import sankey

Sort on left labels and right labels

With the current implementation of pySankey it's actually impossible to guarantee that left labels and right labels will be sorted with the current implementation. Left labels and right labels are instantiated on the fly and are not rollbacked if an inferior label is drawn afterwards.

If 0 -> 1 is drawn before 1 -> 0, right 1 will be below right 0.

If 0 -> 1 is drawn after 1 -> 0, left 1 will be below left 0.

Working with a modified version here.


In [ ]:
left = ['0 (7)', '1 (55)', '0 (7)', '0 (7)', '1 (55)', '1 (55)', '2 (1)', '3 (26)', '3 (26)', '3 (26)']

right = ['0 (10)', '1 (3)', '2 (3)', '3 (73)', '0 (10)', '3 (73)', '3 (73)', '0 (10)', '2 (3)', '3 (73)']

leftWeight = [ 2., 3., 1., 4., 7., 45., 1., 1., 2., 23.]

rightWeight = [ 2., 3., 1., 4., 7., 45., 1., 1., 2., 23.]

sankey.sankey(
                #left=sorted(classesDF['pretest'].values), right=sorted(classesDF['posttest'].values),
                left=left,
                right=right,
                leftWeight=leftWeight,
                rightWeight=rightWeight,
                aspect=20,
                fontsize=20,
                figureName="testSankey"
            )

In [ ]:
from pySankey import sankey

In [ ]:
left = ['1 (55)', '0 (7)', '1 (55)', '0 (7)', '0 (7)', '1 (55)', '2 (1)', '3 (26)', '3 (26)', '3 (26)']

right = ['0 (8)', '1 (5)', '1 (5)', '2 (3)', '3 (73)', '3 (73)', '3 (73)', '0 (8)', '2 (3)', '3 (73)']

leftWeight = [ 7., 2., 3., 1., 4., 45., 1., 1., 2., 23.]

rightWeight = [ 7., 2., 3., 1., 4., 45., 1., 1., 2., 23.]

sankey.sankey(
                #left=sorted(classesDF['pretest'].values), right=sorted(classesDF['posttest'].values),
    left=left,
    right=right,
    leftWeight=leftWeight,
    rightWeight=rightWeight,
    aspect=20,
    fontsize=20,
    figureName="testSankey",
    closePlot=False,
            )

In [ ]:
left = ['0 (7)', '1 (55)', '0 (7)', '0 (7)', '1 (55)', '1 (55)', '2 (1)', '3 (26)', '3 (26)', '3 (26)']

right = ['1 (5)', '1 (5)', '2 (3)', '3 (73)', '0 (8)', '3 (73)', '3 (73)', '0 (8)', '2 (3)', '3 (73)']

leftWeight = [ 2., 3., 1., 4., 7., 45., 1., 1., 2., 23.]

rightWeight = [ 2., 3., 1., 4., 7., 45., 1., 1., 2., 23.]

sankey.sankey(
                #left=sorted(classesDF['pretest'].values), right=sorted(classesDF['posttest'].values),
    left=left,
    right=right,
    leftWeight=leftWeight,
    rightWeight=rightWeight,
    aspect=20,
    fontsize=20,
    figureName="testSankey",
    closePlot=True,
            )

In [ ]:
df = pd.read_csv(
    'customers-goods.csv', sep=',',
    names=['id', 'customer', 'good', 'revenue']
)
rightWeight = df['revenue'].values[1:].astype(float)
leftWeight = 2 * df['revenue'].values[1:].astype(float)
sankey.sankey(
    left=df['customer'].values[1:],
    right=df['good'].values[1:],
    rightWeight=rightWeight,
    leftWeight=leftWeight,
    aspect=20,
    fontsize=20,
    figureName=None,
    closePlot=True,
)

In [ ]:
classesDF = pd.DataFrame(
    index = ['1','2','3','4'],
    columns = ['pretest', 'posttest'],
    data=''
)
classesDF.loc['1', 'pretest'] = 'good'
classesDF.loc['1', 'posttest'] = 'good'
classesDF.loc['2', 'pretest'] = 'good'
classesDF.loc['2', 'posttest'] = 'bad'
classesDF.loc['3', 'pretest'] = 'bad'
classesDF.loc['3', 'posttest'] = 'good'
classesDF.loc['4', 'pretest'] = 'bad'
classesDF.loc['4', 'posttest'] = 'bad'
weight = pd.Series(index=classesDF.index, data=10).values.astype(float)
sankey.sankey(
    left=classesDF['pretest'].values,
    right=classesDF['posttest'].values,
    rightWeight=weight,
    leftWeight=weight,
    aspect=20,
    fontsize=20,
    figureName=None,
    closePlot=False,
)

In [ ]:
len(rightWeight), len(df.index)

In [ ]:
weight

In [ ]:
import matplotlib.rcsetup as rcsetup
print(rcsetup.all_backends)

In [ ]:
import pySankey
print(dir(pySankey))
print(pySankey.__path__)

In [ ]:
import pandas as pd
from pySankey import sankey

pd.options.display.max_rows = 8
df = pd.read_csv(
    'fruits.txt', sep=' ', names=['true', 'predicted']
)
colorDict = {
    'apple':'#f71b1b',
    'blueberry':'#1b7ef7',
    'banana':'#f3f71b',
    'lime':'#12e23f',
    'orange':'#f78c1b'
}
sankey.sankey(
    df['true'], df['predicted'], aspect=20, colorDict=colorDict,
    fontsize=12, figureName=None
)
# Result is in "fruit.png"

In [ ]:
df['true'].value_counts()

In [ ]:
df['predicted'].value_counts()

In [ ]:
df = pd.read_csv(
    'customers-goods.csv', sep=',',
    names=['id', 'customer', 'good', 'revenue']
)
df = df.drop(0)
df.index = df['id']
df = df.drop(['id'], axis=1)
df['revenue'] = df['revenue'].apply(float)
sankey.sankey(
    left=df['customer'], right=df['good'], rightWeight=df['revenue'], aspect=20,
    fontsize=20, figureName=None
)
# Result is in "customer-good.png"