In [ ]:
# bot-vs-bot revert table: https://quarry.wmflabs.org/query/17237
!wget https://quarry.wmflabs.org/run/161084/output/0/tsv?download=true -O botvbot.tsv
In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
df = pd.read_csv("botvbot.tsv", sep="\t")
len(df)
Out[2]:
In [3]:
print(df.columns)
In [4]:
df[0:5]
Out[4]:
In [5]:
df['reverting_timestamp_dt'] = pd.to_datetime(df['reverting_timestamp'], format="%Y%m%d%H%M%S")
In [6]:
df = df.set_index('reverting_timestamp_dt')
In [7]:
df.page_namespace.value_counts()
Out[7]:
In [8]:
gp = df.groupby([pd.TimeGrouper('1D', closed='left'), 'page_namespace'])
In [9]:
daily_namespace_g = gp.rev_id.count()
daily_namespace_g.head()
Out[9]:
In [10]:
namespaces = [0,1,2,3,4,5,6,10,11,14]
In [11]:
daily_namespace_g.unstack()[namespaces].head()
Out[11]:
In [12]:
daily_namespace_unstack = daily_namespace_g.unstack()[namespaces]
In [29]:
daily_namespace_unstack[namespaces].plot(kind='area', subplots=True, figsize=[12,34], sharex=False, sharey=False, logy=False)
Out[29]:
In [14]:
daily_namespace_unstack[namespaces].plot(subplots=True, figsize=[12,34], sharex=False, sharey=False, logy=True)
Out[14]:
In [15]:
daily_namespace_unstack[namespaces].plot(kind='line', figsize=[14,12], linewidth=1)
Out[15]:
In [16]:
gp2 = df.groupby([pd.TimeGrouper('1W', closed='left'), 'page_namespace'])
In [17]:
weekly_namespace_g = gp2.rev_id.count()
weekly_namespace_g.head()
Out[17]:
In [18]:
namespaces = [0,1,2,3,4,5,6,10,11,14]
In [19]:
weekly_namespace_g.unstack()[namespaces].head()
Out[19]:
In [20]:
weekly_namespace_unstack = weekly_namespace_g.unstack()[namespaces]
In [24]:
weekly_namespace_unstack[namespaces].plot(kind='area', subplots=True, figsize=[12,34], sharex=False, sharey=False, logy=False)
Out[24]:
In [26]:
weekly_namespace_unstack[namespaces].plot(kind='line', subplots=True, figsize=[12,34], sharex=False, sharey=False, logy=True)
Out[26]:
In [28]:
weekly_namespace_unstack[namespaces].plot(kind='area', stacked='true', figsize=[14,12], linewidth=2)
Out[28]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: