In [5]:
import pandas as pd
pd?
In [6]:
log = pd.read_csv("../dataset/linux_blame_log.csv")
log.head()
Out[6]:
In [7]:
log.info()
In [10]:
top10 = log.author.value_counts().head(10)
top10
Out[10]:
In [13]:
%matplotlib inline
top10.plot.pie();
In [15]:
log.timestamp = pd.to_datetime(log.timestamp)
log.head()
Out[15]:
In [16]:
log['age'] = pd.Timestamp('today') - log.timestamp
log.head()
Out[16]:
In [18]:
log['component'] = log.path.str.split("/").str[:2].str.join(":")
log.head()
Out[18]:
In [19]:
age_for_component = log.groupby('component').age.min().sort_values()
age_for_component.head()
Out[19]:
In [21]:
age_for_component.plot.bar(figsize=(15,5))
Out[21]: