In [1]:
import pandas as pd
log = pd.read_csv("../dataset/linux_blame_log.csv")
log.head()
Out[1]:
In [2]:
log.info()
In [3]:
top10 = log.author.value_counts().head(10)
top10
Out[3]:
In [4]:
%matplotlib inline
top10.plot.pie(figsize=[5,5],
title="Top 10 Wissensträger",
label="");
In [5]:
log.timestamp = pd.to_datetime(log.timestamp)
log.head()
Out[5]:
In [6]:
log['age'] = pd.Timestamp('today') - log.timestamp
log.head()
Out[6]:
In [7]:
log['component'] = log.path.str.split("/").str[0:2].str.join(":")
log.head()
Out[7]:
In [8]:
age_per_component = \
log.groupby(['component'])\
.age.min().sort_values()
age_per_component.head()
Out[8]:
In [9]:
age_per_component.tail(10)
Out[9]:
In [10]:
age_per_component.plot.bar(figsize=[15,5]);