In [53]:
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns #sets up styles and gives us more plotting options
In [54]:
input_flow= pd.read_csv('data/TMRW_flow.csv')
# rename columns
input_flow.columns=['Page','Sessions','Conversions']
# filter page with CR > 0
input_flow = input_flow[input_flow.Conversions > 0]
# group by page
input_flow_index = input_flow.set_index('Page')
input_flow
Out[54]:
In [55]:
input_flow['CR'] = input_flow.Conversions / input_flow.Sessions * 100
input_flow
Out[55]:
In [56]:
d= input_flow.drop('Sessions', 1)
d
Out[56]:
In [58]:
input_flow_cr = input_flow.set_index('CR')
max_CR = max(input_flow.CR)
best_page = input_flow_cr.loc[max_CR,'Page']
best_page
Out[58]:
In [59]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
page=d['Page']
convesions = d['Conversions']
cr = d['CR']
injuries = [1625,1752,1629,2255,1630]
colors=cm.rainbow(np.random.rand(N))
plt.scatter(conversions,cr,s=injuries,color=colors)
for i in range(N):
plt.annotate(page[i],xy=(conversions[i],cr[i]))
plt.xlabel('Conversions')
plt.ylabel('CR')
# Move title up with the "y" option
plt.title('USER FLOW',y=1.05)
plt.show()
In [ ]:
In [ ]: