In [10]:
import json
import pandas as pd
from matplotlib import pylab as plt

.загружаем файлы .json


In [11]:
path = 'task_data/Sessions_Page.json'
path2 = 'task_data/Goal1CompletionLocation_Goal1Completions.json'

with open(path, 'r') as f:
    sessions_page = json.loads(f.read())

with open(path2, 'r') as f:
    goals_page = json.loads(f.read())

Смотрим, где именно в файле интересующие нас данные


In [12]:
type (sessions_page)
sessions_page.keys()
sessions_page['reports'][0].keys()
sessions_page['reports'][0]['data']['rows']


Out[12]:
[{'dimensions': ['/'], 'metrics': [{'values': ['5925']}]},
 {'dimensions': ['/?_hsenc=p2ANqtz-_McBb_ag9eyaYD3mJjDG7G_uB3uhgGcsBxi15N7Dkx2CB6Q4EicvyM4qxz_QhoX49gt27HZU9461DeUyoMURHz597MYA&_hsmi=42202727'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/?_hsenc=p2ANqtz-_ORc__Ui5VKSGvLO72LiNTFW98hg-mQlkIsltDFF4bZnKtFg4qvG21fhsL8zvY5rcQaRGke58VTsOsZTHhcAw7YWQMDvVnC8flaNVmXtZ3Pt9_sRI&_hsmi=45155107'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/?_hsenc=p2ANqtz-_PMnaFvC1MTCPb_RghCCNcPyb6VHPUfs1AGacdBEc6k4WIkLbn_u24M5CG820anvWE4-jQa-2c2IEUNpl9Xqln0NruPA&_hsmi=42202727'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/?_hsenc=p2ANqtz--3SZ9_TxAeV_6dUflCeBmtEy9p5-kx7gJidd5Ibv-WOsLnzj_uCiZtdhRikZqTRif43CkxFcza6g657KSMlorVQiMBEg&_hsmi=45145424'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/?_hsenc=p2ANqtz--5qSlXatRwmauQbFfrahRUIfDCf0Sr03obAlY44VJRH5EId-A3BprJhTJXbEJ4KaVk0DPxxpp6r6sXB9RVDtgZeDJXeQ&_hsmi=45155107'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/?_hsenc=p2ANqtz--7jiNooHo7FlMd6hWQJ9lQ4YUTCEpBhXLKmoZXZwT8ceD_LFsoPxX3lxNtufY9-q6EQvsyGq0nstD-mq_LHIX3sUSUirBYOEP3ZIhOQ1zQkts8s2Y&_hsmi=45145424'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/?_hsenc=p2ANqtz-8cXEz7dOC7R8Rn0Z1ursKJbEJXbIM9-J_DmmhjhMfNNtV-x7SIG0PTSiax05Wb5NQQs5lXV8J7VXQLkLFMROPILH01fw&_hsmi=45155107'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/?_hsenc=p2ANqtz-8v6U0mczdHFIUhMyXIDy3wUfa-ZUxjJnQpdswsm8xGtqURMrq6SsUZDJ2Pd6JY9RHOrME0FHf2BZQX_GbQisqcaifxHg&_hsmi=45155107'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/?_hsenc=p2ANqtz-9L_dXES2vJOjXvDV1KF1GweVEyHvneXaMmu_-a73CHsTcA8sBZ4nfsMSJZgE4BRwvWHdW3kCGFTordfdwC-_7cRgJbsQ&_hsmi=45145424'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/?_hsenc=p2ANqtz-9MN7HnoJ9kpuvsMGtK1ocWm95ICOHZYyOj8Y3vGUdV_ZqlQEgCQD9acCrN2_Ynz3dKDWr6kFJ0r9fYlxjyZCqr95UmLe0x5RBJKnbRobUgIsOz_4Y&_hsmi=42202727'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/apage'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/api/v1/page/html?roomHash=611fc30b6efe07edea8c301d0590d6eef9bd2c0f6feffa7f932bceb75c2d8e98&variation=A&responsiveMode=null'],
  'metrics': [{'values': ['3']}]},
 {'dimensions': ['/BYTE'], 'metrics': [{'values': ['5']}]},
 {'dimensions': ['/coffeawrds'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/coffee'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/coffeeaward'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/CoffeeAwards'], 'metrics': [{'values': ['13']}]},
 {'dimensions': ['/coffeeawards.php'], 'metrics': [{'values': ['2']}]},
 {'dimensions': ['/coffeeswards'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/cro'], 'metrics': [{'values': ['4']}]},
 {'dimensions': ['/en_us/article/google-wins-legal-battle-against-pro-trump-spammer-over-the-letter-g'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz-_1UCYA4kBE9MhrKrjFFnG8wJi39lmmN--gjM_kcklm1Wehl4RGXV8ndu6_z3npbKyvClqbxJURi7vETChCZ8Z7SPrnvg&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz-_7G__6Fu-hdAW_QDTCGsWHLFw-s0J0G0BaIsIu3wOkys-vre6bnNYsOAXda6F7PZke4Wne7ZB0cAJmMj7C1FeAIavg7-7ZnIMUPMoLgQLVyU45R8A&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz-_KHuFCrySSUj6owG5YovjZ72Lm9Q3loiH70CIUdUaLNKavgM3dcBXLeom4Wn047Lck984ngSuBqsZTiT2M5xBoHNrWKBX9sO-8O9aztiWt-jYC_Cw&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz-_kpjrIf3w91oSb6OZ1ffFcixc8rD38wVBF9OUaZO6xLw97wlKt-G0j14I3smruA5LgrCrSg1eJtUSoy3fjXZDzADkpTehFhS2kQ8v4-aOsq-88x5s&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz--EaTY5uMaCCENlHdZrqjWU87gsqIjB931oqY63YO16lzxdVOsHWdh9vwDC4oKLDbjgef1oqp7P7oiiU9mztVv9ZIQ06DeFoE83DnzAeng8ryrEvV0&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz--Fmzrt-is6YUkQDiyPm58tfNZI4Owhaobos17zqlKhCvvgmolMpZ4T9i8uN0diAotHwLyVzrLXpvdQNOyfZO7HalvQdA&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz--r-v1KNdidC5MRXxUbpNvKNUPZHrmc1BBM2L07bdEh9x0qmMaXCmyKhLW4JaqkJXug5MBWb-Vq4U4pRX6g8RAjEaJuIt-7fIihR3cytFlVY2N0acc&_hsmi=42202779'],
  'metrics': [{'values': ['2']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz--yWxlJiqKw6qLxHn9HnAliavqXd5zkUGepQhAqizxaAzN34LOn8rEQ0Dm5RtSJzVZ30QYk4orMN3lyMgbM6JJfCewqvQYIiODjjfu47iKD67c2iic&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz-86i5FI8qBJdD_t64nguhhjhOov-hf1IPpyV26yrEtWbMFya66lMUD4AWTt61AeXGWaM234bmzZanig122PbTm_mr_unQ&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz-8n7M1UT4lKodEVHxtg_xPPAdosXCNKndXQRJ7vOEQbqhudVf-EMsbbw8tvEPHAwppxkH_nKKGtpVuyTgWln7tsfqyIcg&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz-8r8DJGe2dOKVOvqLA0P41_zTaNrnqIAroB1PvSAZkNWC0JxdDd-hm02aOhBwaGjITXCtBfY_ni_t2Yei_hyrOBtAuLpg&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz-9-ml0xW_uInx74E0sCS8Dr_QFbO2ioMoL-g5ysT4V5Q2V0iZdQvFCwNkGH9JdmAgVbABac-4RZMJdEwVWV9pZvxJjnWg&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz-94T5q6sCVIPbizZXpy8nGenRB9lqiLfHeBfNuwGsVW6sUU9VPQWiV32BxAMODssrtK2xsx9R-nRuBXB9HlAolFUKMYFA&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz-9NREL2hMY7qh4lQdA6lronzFISEUO9Q2OAvaL8P4wuFsYoTNpmRdyHVFysIJ2LQS5lr8EyFa9G3eL3zm9HtWZo27Vn-g&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/f?_hsenc=p2ANqtz-9op2ESwofMQMdqtCf_fL_UleMMStplEPtbDlamlXdXiIukiY7nYICCEo3xOW_ibAL1X76gYPFXAkNPBD-Z7FEQ6ULjW2PBHlXeoCLt10B2O5TnDK0&_hsmi=42202779'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/free-web-scan.php'], 'metrics': [{'values': ['5']}]},
 {'dimensions': ['/media/OurFabMembers/'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/media/rushes/'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/media/rushes/thumbnails/'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/ouramazingteam'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/ourfabmembers'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/portfolio-single-gallery.html'],
  'metrics': [{'values': ['2']}]},
 {'dimensions': ['/portfolio-single.html'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/printer'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/search?q=cache:9Se28Bsen5YJ:tmrw.co/+&cd=1&hl=en&ct=clnk&gl=uk'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/sharebutton.to'], 'metrics': [{'values': ['10']}]},
 {'dimensions': ['/staging/'], 'metrics': [{'values': ['5']}]},
 {'dimensions': ['/staging/2. live site/TMRW_Byte_Cafe.php'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/strike'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/subscribe.php'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/subscriube'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/tmrw_byte_cafe.php'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/TMRW_Byte_Cafe.php'], 'metrics': [{'values': ['270']}]},
 {'dimensions': ['/TMRW_Byte_Cafe.php Croydon'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/tmrw_faqs.php'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/TMRW_FAQs.php'], 'metrics': [{'values': ['129']}]},
 {'dimensions': ['/TMRW_Get_in_touch'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/TMRW_Get_in_touch.php'], 'metrics': [{'values': ['4']}]},
 {'dimensions': ['/TMRW_Images/TMRW_Gallery_Images/TMRW_Offices/private_office_in-coworking_space_TMRW_croydon_004.jpg'],
  'metrics': [{'values': ['2']}]},
 {'dimensions': ['/TMRW_Images/TMRW_Gallery_Images/TMRW_Offices/private-offices_in_coworking_space_tmrw_croydon_007.jpg'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/TMRW_subscribe.php'], 'metrics': [{'values': ['2']}]},
 {'dimensions': ['/tmrw_the_team.php'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/TMRW_the_team.php'], 'metrics': [{'values': ['140']}]},
 {'dimensions': ['/TMRW_trainstrike_form.php'],
  'metrics': [{'values': ['3']}]},
 {'dimensions': ['/TMRWpeople1.php'], 'metrics': [{'values': ['2']}]},
 {'dimensions': ['/trainstrike'], 'metrics': [{'values': ['2']}]},
 {'dimensions': ['/trainstrike_form.php'], 'metrics': [{'values': ['1']}]},
 {'dimensions': ['/trainstrikes.php'], 'metrics': [{'values': ['231']}]},
 {'dimensions': ['/trainstrikes.php?inf_contact_key=37d743fb0e8703a972f25e1bf88ba24e9c1670a04e6fad4bbf1dfc744b2373a7'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/trainstrikes.php?inf_contact_key=dd4202a772c196c27ca487b9d299fb4bc052e92cacf83cdfee0b5da024cd5b55'],
  'metrics': [{'values': ['1']}]},
 {'dimensions': ['/voteforbyte.php'], 'metrics': [{'values': ['79']}]},
 {'dimensions': ['/WeWork/Guardian1.jpg'], 'metrics': [{'values': ['1']}]}]

Считываем нужные нам данные как датафреймы


In [13]:
sessions_df = pd.DataFrame(sessions_page['reports'][0]['data']['rows'])
goals_df = pd.DataFrame(goals_page['reports'][0]['data']['rows'])
#sessions_df
goals_df


Out[13]:
dimensions metrics
0 [(entrance)] [{'values': ['6']}]
1 [/] [{'values': ['85']}]
2 [/portfolio-single-gallery.html] [{'values': ['1']}]
3 [/TMRW_Byte_Cafe.php] [{'values': ['22']}]
4 [/TMRW_FAQs.php] [{'values': ['24']}]
5 [/TMRW_Get_in_touch.php] [{'values': ['3']}]
6 [/TMRW_the_team.php] [{'values': ['5']}]

Создаем в датафреймах отдельные столбцы с данными в удобных нам форматах.


In [14]:
x=[]
for i in sessions_df.dimensions:
    x.append(str(i[0]))

sessions_df.insert(2, 'name', x)

x=[]
for i in goals_df.dimensions:
    x.append(str(i[0]))
    
goals_df.insert(2, 'name', x)

In [15]:
x=[]
for i in sessions_df.metrics:
    x.append(float(i[0]['values'][0]))

sessions_df.insert(3, 'sessions', x)

x=[]
for i in goals_df.metrics:
    x.append(float(i[0]['values'][0]))

goals_df.insert(3, 'goals', x)

Создаем столбцы в датафрейме с "Goal1Complitations", где будем хранить количество сессий и конверсию


In [16]:
goals_df.insert(4, 'sessions', 0)
goals_df.insert(5, 'convers_rate', 0)

Переносим из таблицы сессий количество сессий и считаем конверсию для каждой страницы, которая есть в "Goal1Complitations"


In [17]:
for i in range(7):
    goals_df.sessions[i] = sum(sessions_df.sessions[sessions_df.name==goals_df.name[i]])
goals_df.convers_rate = goals_df.goals/goals_df.sessions*100


C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app

Обнулим конверсию для тех страниц по которым не було сессий. В даннос случае это страница "(entrance)"


In [18]:
goals_df.convers_rate[goals_df.sessions==0] = 0


C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

In [19]:
goals_df.ix[range(1,7),[2,5]]


Out[19]:
name convers_rate
1 / 1.434599
2 /portfolio-single-gallery.html 50.000000
3 /TMRW_Byte_Cafe.php 8.148148
4 /TMRW_FAQs.php 18.604651
5 /TMRW_Get_in_touch.php 75.000000
6 /TMRW_the_team.php 3.571429

Строим график


In [20]:
goals_df.ix[range(1,7),[2,5]].plot(kind="bar", legend=False)
plt.xticks([0, 1, 2, 3, 4, 5], goals_df.name, rotation="vertical")
plt.show()


Выводим результат


In [22]:
name = goals_df.ix[goals_df.convers_rate==max(goals_df.convers_rate),2]
print ('The best converting page on your site is "',str(name)[5:len(name)-28], '" with conversion rate', max(goals_df.convers_rate),'%')


The best converting page on your site is " /TMRW_Get_in_touch.ph " with conversion rate 75.0 %

In [ ]: