In [1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import datetime
from __future__ import division
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client
%pylab inline
In [2]:
# We grab pings starting from 2 days ago until [TimeWindow] days ago.
TimeWindow = 14
# Additional filters.
Filters = {
'app': 'Firefox',
# We sample 0.5% of pings. For testing, it is beter to use a small number
# here (like 0.001) to speed up processing time.
'fraction': 0.005,
# Optionally restrict pings to a single channel.
# 'channel': 'beta',
}
# In case you need them!
VendorIDs = {
'Intel': '0x8086',
'NVIDIA': '0x10de',
'AMD': '0x1002',
'ATI': '0x1002'
}
In [3]:
###############################
# This section gathers pings. #
###############################
def run_get_pings():
def fmt_date(d):
return d.strftime("%Y%m%d")
t1 = fmt_date(datetime.datetime.now() - datetime.timedelta(TimeWindow + 2)) # go back 16 days
t2 = fmt_date(datetime.datetime.now() - datetime.timedelta(2)) # go back 2 days
return get_pings(sc, build_id=(t1, t2), **Filters)
# Get pings for the parameters in the previous step.
raw_pings = run_get_pings()
In [4]:
######################################################################
# This section takes the raw ping list, then formats and filters it. #
######################################################################
# Map the pings into a more readable dictionary-like form. To see
# what these look like, execute "pings.take(1)".
unique_pings = get_pings_properties(raw_pings, [
"clientId",
"environment/build/version",
"environment/system/os/name",
"environment/system/os/version",
"environment/system/os/servicePackMajor",
"environment/system/os/servicePackMinor",
"environment/system/gfx/adapters",
])
unique_pings = get_one_ping_per_client(unique_pings)
In [5]:
# We add two extra steps. The first rewrites the ping to have some
# information more easily accessible (like the primary adapter),
# and the second step removes any pings that don't have adapter
# information.
def rewrite_ping(p):
adapters = p.get('environment/system/gfx/adapters', None)
if not adapters:
return None
adapter = adapters[0]
p['adapter'] = adapter
# Convert the version to a tuple of integers.
if adapter['driverVersion'] is not None:
p['driverVersion'] = [int(n) for n in adapter['driverVersion'].split('.') if n.isdigit()]
return p
def filter_ping(p):
return 'adapter' in p
pings = unique_pings.map(rewrite_ping).filter(filter_ping)
pings = pings.cache()
In [ ]:
# Observe the format of a random ping. This may take some time since it has to
# execute the pipeline.
pings.take(1)
In [6]:
# Count the total number of sessions in the dataset.
TotalSessions = pings.count()
print('Number of sessions: {0}'.format(TotalSessions))
In [7]:
##############################################
# Helper function to compare version tuples. #
##############################################
def compare_version_tuples(v1, v2):
n = max(len(v1), len(v2))
for i in xrange(0, n):
x1 = v1[i] if i < len(v1) else 0
x2 = v2[i] if i < len(v2) else 0
if x1 != x2:
return x1 - x2
return 0
# Tests
assert(compare_version_tuples((1, 0), (1, 1)) < 0)
assert(compare_version_tuples((1, 1), (1, 0)) > 0)
assert(compare_version_tuples((1, 1), (1, 1)) == 0)
assert(compare_version_tuples((1,), (1, 0)) == 0)
assert(compare_version_tuples((1,), (1, 0)) == 0)
assert(compare_version_tuples((1,0), (2,5)) < 0)
In [8]:
# Sample filter #1 - how many people are using Intel devices
# with a driver less than 8.15.10.2622? (bug 1175366).
BadVersion = (8, 15, 10, 2622)
def sample_filter_1(p):
if p['adapter']['vendorID'] != VendorIDs['Intel']:
return False
if 'driverVersion' not in p:
return False
return compare_version_tuples(p['driverVersion'], BadVersion) < 0
sample_result_1 = pings.filter(sample_filter_1)
print('{0} out of {1} sessions matched. ({2:.2f}%)'.format(
sample_result_1.count(),
pings.count(),
((sample_result_1.count() / pings.count()) * 100)))
In [9]:
# Sample filter #2 - how many users have either devices:
# 0x8086, 0x2e32 - Intel G41 express graphics
# 0x8086, 0x2a02 - Intel GM965, Intel X3100
# See bug 1116812.
#
# Note that vendor and deviceID hex digits are lowercase.
def sample_filter_2(p):
if p['adapter']['vendorID'] != VendorIDs['Intel']:
return False
if p['adapter']['deviceID'] == '0x2e32':
return True
if p['adapter']['deviceID'] == '0x2a02':
return True
return False
sample_result_2 = pings.filter(sample_filter_2)
print('{0} out of {1} sessions matched. ({2:.2f}%)'.format(
sample_result_2.count(),
pings.count(),
((sample_result_2.count() / pings.count()) * 100)))
In [ ]: