You can use this example API request as a starting point for building your API queries. Note that the Legacy Pagecounts API has slightly different schema than the pageview API shown here.
This sample API request would get you all pageviews by web crawlers on the mobile website for English Wikipedia during the month of September, 2017.
Scrape Pageview Mobile Site traffic (current api)
In [99]:
#current
import requests
endpoint = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/{project}/{access}/{agent}/{granularity}/{start}/{end}'
headers={'User-Agent' : 'https://github.com/your_github_username', 'From' : 'abhiv@uw.edu'}
params = {'project' : 'en.wikipedia.org',
'access' : 'mobile-web',
'agent' : 'user',
'granularity' : 'monthly',
'start' : '2015070100',
'end' : '2017091000'#use the first day of the following month to ensure a full month of data is collected
}
api_call = requests.get(endpoint.format(**params))
pageview_mobile_site = api_call.json()
print(pageview_mobile_site)
Scrape Pageview Mobile App traffic (current api)
In [98]:
#current
import requests
endpoint = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/{project}/{access}/{agent}/{granularity}/{start}/{end}'
headers={'User-Agent' : 'https://github.com/your_github_username', 'From' : 'abhiv@uw.edu'}
params = {'project' : 'en.wikipedia.org',
'access' : 'mobile-app',
'agent' : 'user',
'granularity' : 'monthly',
'start' : '2015070100',
'end' : '2017091000'#use the first day of the following month to ensure a full month of data is collected
}
api_call = requests.get(endpoint.format(**params))
pageview_mobile_app = api_call.json()
print(pageview_mobile_app)
Scrape Pageview Desktop Site traffic (current api)
In [82]:
#current
import requests
endpoint = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/{project}/{access}/{agent}/{granularity}/{start}/{end}'
headers={'User-Agent' : 'https://github.com/your_github_username', 'From' : 'abhiv@uw.edu'}
params = {'project' : 'en.wikipedia.org',
'access' : 'desktop',
'agent' : 'user',
'granularity' : 'monthly',
'start' : '2015070100',
'end' : '2017091000'#use the first day of the following month to ensure a full month of data is collected
}
api_call = requests.get(endpoint.format(**params))
pageview_desktop_site = api_call.json()
print(pageview_desktop_site)
Scrape Pageview All Site traffic (current api)
In [81]:
#current
import requests
endpoint = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/{project}/{access}/{agent}/{granularity}/{start}/{end}'
headers={'User-Agent' : 'https://github.com/your_github_username', 'From' : 'abhiv@uw.edu'}
params = {'project' : 'en.wikipedia.org',
'access' : 'all-access',
'agent' : 'user',
'granularity' : 'monthly',
'start' : '2015070100',
'end' : '2017091000'#use the first day of the following month to ensure a full month of data is collected
}
api_call = requests.get(endpoint.format(**params))
pageview_all_site = api_call.json()
print(pageview_all_site)
Scrape Pagecount desktop Site traffic (legacy api)
In [62]:
#Legacy
import requests
endpoint = 'https://wikimedia.org/api/rest_v1/metrics/legacy/pagecounts/aggregate/{project}/{access}/{granularity}/{start}/{end}'
headers={'User-Agent' : 'https://github.com/your_github_username', 'From' : 'abhiv@uw.edu'}
params = {'project' : 'en.wikipedia.org',
'access' : 'desktop-site',
'granularity' : 'monthly',
'start' : '2008010100',
'end' : '2016071000'#use the first day of the following month to ensure a full month of data is collected
}
api_call = requests.get(endpoint.format(**params))
pagecounts_desktop_site = api_call.json()
print(pagecounts_desktop_site)
Scrape Pagecount all Site traffic (legacy api)
In [89]:
#Legacy
import requests
endpoint = 'https://wikimedia.org/api/rest_v1/metrics/legacy/pagecounts/aggregate/{project}/{access}/{granularity}/{start}/{end}'
headers={'User-Agent' : 'https://github.com/your_github_username', 'From' : 'abhiv@uw.edu'}
params = {'project' : 'en.wikipedia.org',
'access' : 'all-sites',
'granularity' : 'monthly',
'start' : '2008010100',
'end' : '2016071000'#use the first day of the following month to ensure a full month of data is collected
}
api_call = requests.get(endpoint.format(**params))
pagecounts_all_sites = api_call.json()
print(pagecounts_all_sites)
Scrape Pagecount mobile Site traffic (legacy api)
In [63]:
#Legacy
import requests
endpoint = 'https://wikimedia.org/api/rest_v1/metrics/legacy/pagecounts/aggregate/{project}/{access}/{granularity}/{start}/{end}'
headers={'User-Agent' : 'https://github.com/your_github_username', 'From' : 'abhiv@uw.edu'}
params = {'project' : 'en.wikipedia.org',
'access' : 'mobile-site',
'granularity' : 'monthly',
'start' : '2008010100',
'end' : '2016071000'#use the first day of the following month to ensure a full month of data is collected
}
api_call = requests.get(endpoint.format(**params))
pagecounts_mobile_site = api_call.json()
print(pagecounts_mobile_site)
In [29]:
countresponse['items']
Out[29]:
In [37]:
countresponse['items'][0]['count']
Out[37]:
In [47]:
viewresponse['items'][3]['views']
Out[47]:
In [70]:
pageview_mobile_site['items'][3]['timestamp']
Out[70]:
Make a dictionary and add relevant info. timestamp = key, counts = value. Repeat for all
In [75]:
#pageview_mobile_site
i = 0
dict_pageview_mobile_site={}
for x in pageview_mobile_site['items']:
dict_pageview_mobile_site[pageview_mobile_site['items'][i]['timestamp']] = pageview_mobile_site['items'][i]['views']
i+=1
In [ ]:
In [76]:
dict_pageview_mobile_site
Out[76]:
In [77]:
#pageview_desktop_site
i = 0
dict_pageview_desktop_site={}
for x in pageview_desktop_site['items']:
dict_pageview_desktop_site[pageview_desktop_site['items'][i]['timestamp']] = pageview_desktop_site['items'][i]['views']
i+=1
In [78]:
dict_pageview_desktop_site
Out[78]:
In [83]:
#pageview_all_site
i = 0
dict_pageview_all_site={}
for x in pageview_all_site['items']:
dict_pageview_all_site[pageview_all_site['items'][i]['timestamp']] = pageview_all_site['items'][i]['views']
i+=1
In [85]:
dict_pageview_all_site
Out[85]:
In [92]:
#pagecount_all_site
i = 0
dict_pagecounts_all_sites={}
for x in pagecounts_all_sites['items']:
dict_pagecounts_all_sites[pagecounts_all_sites['items'][i]['timestamp']] = pagecounts_all_sites['items'][i]['count']
i+=1
In [ ]:
In [93]:
dict_pagecounts_all_sites
Out[93]:
In [95]:
i = 0
dict_pagecounts_desktop_site={}
for x in pagecounts_desktop_site['items']:
dict_pagecounts_desktop_site[pagecounts_desktop_site['items'][i]['timestamp']] = pagecounts_desktop_site['items'][i]['count']
i+=1
In [96]:
i = 0
dict_pagecounts_mobile_site={}
for x in pagecounts_mobile_site['items']:
dict_pagecounts_mobile_site[pagecounts_mobile_site['items'][i]['timestamp']] = pagecounts_mobile_site['items'][i]['count']
i+=1
In [101]:
i = 0
dict_pageview_mobile_app={}
for x in pageview_mobile_app['items']:
dict_pageview_mobile_app[pageview_mobile_app['items'][i]['timestamp']] = pageview_mobile_app['items'][i]['views']
i+=1
In [105]:
dict_pageview_mobile = {}
i=0
for x in pageview_mobile_app['items']:
dict_pageview_mobile[pageview_mobile_app['items'][i]['timestamp']] = pageview_mobile_app['items'][i]['views']+pageview_mobile_site['items'][i]['views']
i+=1
In [112]: