In [53]:
%matplotlib inline 

import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd

pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns #sets up styles and gives us more plotting options

In [54]:
input_flow= pd.read_csv('data/TMRW_flow.csv')

# rename columns
input_flow.columns=['Page','Sessions','Conversions']

# filter page with CR > 0
input_flow = input_flow[input_flow.Conversions > 0]

# group by page
input_flow_index = input_flow.set_index('Page')
input_flow


Out[54]:
Page Sessions Conversions
0 '/ 4436 82
1 '/TMRW_FAQs.php 100 26
2 '/TMRW_Byte_Cafe.php 218 23
3 '/TMRW_the_team.php 99 10

In [55]:
input_flow['CR'] = input_flow.Conversions / input_flow.Sessions * 100
input_flow


Out[55]:
Page Sessions Conversions CR
0 '/ 4436 82 1.848512
1 '/TMRW_FAQs.php 100 26 26.000000
2 '/TMRW_Byte_Cafe.php 218 23 10.550459
3 '/TMRW_the_team.php 99 10 10.101010

In [56]:
d= input_flow.drop('Sessions', 1)
d


Out[56]:
Page Conversions CR
0 '/ 82 1.848512
1 '/TMRW_FAQs.php 26 26.000000
2 '/TMRW_Byte_Cafe.php 23 10.550459
3 '/TMRW_the_team.php 10 10.101010

In [58]:
input_flow_cr = input_flow.set_index('CR')

max_CR = max(input_flow.CR)
best_page = input_flow_cr.loc[max_CR,'Page']
best_page


Out[58]:
"'/TMRW_FAQs.php"

In [59]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np


page=d['Page']
convesions = d['Conversions']
cr = d['CR']
injuries = [1625,1752,1629,2255,1630]

colors=cm.rainbow(np.random.rand(N))

plt.scatter(conversions,cr,s=injuries,color=colors)
for i in range(N):
    plt.annotate(page[i],xy=(conversions[i],cr[i]))
plt.xlabel('Conversions')
plt.ylabel('CR')

# Move title up with the "y" option
plt.title('USER FLOW',y=1.05)
plt.show()



In [ ]:


In [ ]: