In [1]:
%matplotlib inline 

import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd

pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns #sets up styles and gives us more plotting options

In [2]:
# Time period 1st Jan - 30th April (arbitrary )

# API credentials
# Email address 705762800217-compute@developer.gserviceaccount.com
# Key IDs 948ee8e2a420ef14a5d5a29bd35104fe2f1e6ed4

In [3]:
# open file. It is requested via API explorer using request parameters:

#Account: Skein.co
#Property: Skein.co
#View: Skein.co - Report
#ids: ga:93735856
#start-date: 2017-02-01
#end-date: 2017-04-30

#metrics
#ga:goal3Completions



#dimensions
#ga:goalCompletionLocation

In [4]:
input_flow= pd.read_csv('skein_data/SKEIN_user-flow.csv')

# rename columns
input_flow.columns=['Page','Conversions']

# filter page with CR > 0
input_flow = input_flow[input_flow.Conversions > 0]

# group by page
input_flow_index = input_flow.set_index('Page')
input_flow


Out[4]:
Page Conversions
0 (entrance) 2
1 '/ 2
2 '/careers/ 2
3 '/case-studies/ 1
4 '/case-studies/travel-weekly/ 1
5 '/contact-us/ 2
6 '/jobs/junior-software-developer-e-mail-support/ 1

In [ ]:
# open file. It is requested via API explorer using request parameters:

#Account: Skein.co
#Property: Skein.co
#View: Skein.co - Report
#ids: ga:93735856
#start-date: 2017-02-01
#end-date: 2017-04-30

#metrics
#ga:sessions

#dimensions
#ga:pagePath

#filter
#ga:sessions>30

In [18]:
sessions = pd.read_csv('skein_data/SKEIN_user-flow_2.csv')

# rename columns
sessions.columns=['Page2','Sessions']

# filter page with CR > 0
sessions = sessions[sessions.Sessions > 0]

# group by page
sessions_index = sessions.set_index('Page2')
sessions


Out[18]:
Page2 Sessions
0 '/ 1415
1 '/2017-data-technology-trends/ 43
2 '/careers/ 317
3 '/case-studies/ 110
4 '/case-studies/toothscan-dental-app/ 33
5 '/google-liar-ru-spam-in-analytics/ 40
6 '/incubator-rus/ 36
7 '/news/google-g/ 33
8 '/what_we_do_lean_innovation/ 34

In [21]:
dif = sessions['Page2'] - input_flow['Page']
print (dif)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\ops.py in na_op(x, y)
    651             result = expressions.evaluate(op, str_rep, x, y,
--> 652                                           raise_on_error=True, **eval_kwargs)
    653         except TypeError:

C:\ProgramData\Anaconda3\lib\site-packages\pandas\computation\expressions.py in evaluate(op, op_str, a, b, raise_on_error, use_numexpr, **eval_kwargs)
    209         return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error,
--> 210                          **eval_kwargs)
    211     return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error)

C:\ProgramData\Anaconda3\lib\site-packages\pandas\computation\expressions.py in _evaluate_numexpr(op, op_str, a, b, raise_on_error, truediv, reversed, **eval_kwargs)
    120     if result is None:
--> 121         result = _evaluate_standard(op, op_str, a, b, raise_on_error)
    122 

C:\ProgramData\Anaconda3\lib\site-packages\pandas\computation\expressions.py in _evaluate_standard(op, op_str, a, b, raise_on_error, **eval_kwargs)
     62     with np.errstate(all='ignore'):
---> 63         return op(a, b)
     64 

TypeError: unsupported operand type(s) for -: 'str' and 'str'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\ops.py in safe_na_op(lvalues, rvalues)
    675             with np.errstate(all='ignore'):
--> 676                 return na_op(lvalues, rvalues)
    677         except Exception:

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\ops.py in na_op(x, y)
    657                 mask = notnull(x) & notnull(y)
--> 658                 result[mask] = op(x[mask], _values_from_object(y[mask]))
    659             elif isinstance(x, np.ndarray):

TypeError: unsupported operand type(s) for -: 'str' and 'str'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-21-e254442971fd> in <module>()
----> 1 dif = sessions['Page2'] - input_flow['Page']
      2 print (dif)

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\ops.py in wrapper(left, right, name, na_op)
    713                 lvalues = lvalues.values
    714 
--> 715         result = wrap_results(safe_na_op(lvalues, rvalues))
    716         return construct_result(
    717             left,

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\ops.py in safe_na_op(lvalues, rvalues)
    684                 if is_object_dtype(lvalues):
    685                     return _algos.arrmap_object(lvalues,
--> 686                                                 lambda x: op(x, rvalues))
    687             raise
    688 

pandas\src\algos_common_helper.pxi in pandas.algos.arrmap_object (pandas\algos.c:46681)()

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\ops.py in <lambda>(x)
    684                 if is_object_dtype(lvalues):
    685                     return _algos.arrmap_object(lvalues,
--> 686                                                 lambda x: op(x, rvalues))
    687             raise
    688 

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [ ]:


In [ ]:


In [5]:
input_flow['CR'] = input_flow.Conversions / input_flow.Sessions * 100
input_flow


Out[5]:
Page Sessions Conversions CR
0 '/ 1415 258 18.233216
1 '/2017-data-technology-trends/ 43 7 16.279070
2 '/careers/ 317 105 33.123028
3 '/case-studies/ 110 76 69.090909
4 '/case-studies/toothscan-dental-app/ 33 6 18.181818
5 '/google-liar-ru-spam-in-analytics/ 40 32 80.000000
6 '/incubator-rus/ 36 5 13.888889
7 '/news/google-g/ 33 31 93.939394
8 '/what_we_do_lean_innovation/ 34 55 161.764706

In [6]:
d= input_flow.drop('Sessions', 1)
d


Out[6]:
Page Conversions CR
0 '/ 258 18.233216
1 '/2017-data-technology-trends/ 7 16.279070
2 '/careers/ 105 33.123028
3 '/case-studies/ 76 69.090909
4 '/case-studies/toothscan-dental-app/ 6 18.181818
5 '/google-liar-ru-spam-in-analytics/ 32 80.000000
6 '/incubator-rus/ 5 13.888889
7 '/news/google-g/ 31 93.939394
8 '/what_we_do_lean_innovation/ 55 161.764706

In [7]:
input_flow_cr = input_flow.set_index('CR')

max_CR = max(input_flow.CR)
best_page = input_flow_cr.loc[max_CR,'Page']
best_page


Out[7]:
"'/what_we_do_lean_innovation/"

In [8]:
from bokeh.io import output_notebook
from bokeh.charts import Bar, Line, show
from bokeh.plotting import figure, output_file, show

output_notebook()
p = Bar(input_flow, 'Page', values='CR', title="The best converting page on your site is %s" % best_page)
p.legend.location = "top_right"
show(p)
print("Put an additional button \"Contact Us\" on the main page. (or adding the contact form link to the menu bar at the top of the homepage")


Loading BokehJS ...
Put an additional button "Contact Us" on the main page. (or adding the contact form link to the menu bar at the top of the homepage

In [9]:
df=pd.DataFrame(d)
total_rows=len(df.axes[0])
print(total_rows)


9

In [11]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np

page=d['Page']
conversions = d['Conversions']
cr = d['CR']
injuries = [1625,1752,1629,2255,1630]

colors=cm.rainbow(np.random.rand(total_rows))

plt.scatter(conversions,cr,s=injuries,color=colors)
for i in range(total_rows):
    plt.annotate(page[i],xy=(conversions[i],cr[i]))
plt.xlabel('Conversions')
plt.ylabel('CR')

# Move title up with the "y" option
plt.title('USER FLOW',y=1.05)
plt.show()



In [ ]:


In [ ]:


In [ ]: