In [1]:
import pandas as pd

blame = pd.read_csv("../dataset/linux_blame_log.csv")
blame.head()


Out[1]:
path author timestamp line
0 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 1448528085000000000 1
1 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 1448528085000000000 2
2 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 1448528085000000000 3
3 drivers/scsi/bfa/bfad_drv.h Jing Huang 1253753175000000000 4
4 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 1448528085000000000 5

In [2]:
blame.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5665947 entries, 0 to 5665946
Data columns (total 4 columns):
path         object
author       object
timestamp    int64
line         int64
dtypes: int64(2), object(2)
memory usage: 172.9+ MB

In [3]:
top10 = blame.author.value_counts().head(10)
top10


Out[3]:
Linus Torvalds           838200
Hans Verkuil             118432
Mauro Carvalho Chehab    102107
Michael Chan              53945
Mike Marciniszyn          44843
Ralph Campbell            42453
Nicholas Bellinger        41823
Laurent Pinchart          40438
Antti Palosaari           40390
Alexander Duyck           39307
Name: author, dtype: int64

In [4]:
%matplotlib inline
top10.plot.pie()


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x1ce80847f60>

In [5]:
blame.timestamp = pd.to_datetime(blame.timestamp)
blame.head()


Out[5]:
path author timestamp line
0 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 2015-11-26 08:54:45 1
1 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 2015-11-26 08:54:45 2
2 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 2015-11-26 08:54:45 3
3 drivers/scsi/bfa/bfad_drv.h Jing Huang 2009-09-24 00:46:15 4
4 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 2015-11-26 08:54:45 5

In [6]:
blame['age'] = pd.Timestamp('today') - blame.timestamp
blame.head()


Out[6]:
path author timestamp line age
0 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 2015-11-26 08:54:45 1 1036 days 22:35:22.839129
1 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 2015-11-26 08:54:45 2 1036 days 22:35:22.839129
2 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 2015-11-26 08:54:45 3 1036 days 22:35:22.839129
3 drivers/scsi/bfa/bfad_drv.h Jing Huang 2009-09-24 00:46:15 4 3291 days 06:43:52.839129
4 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 2015-11-26 08:54:45 5 1036 days 22:35:22.839129

In [11]:
blame['components'] = blame.path.str.split("/").str[0:2].str.join(":")
blame.head()


Out[11]:
path author timestamp line age components
0 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 2015-11-26 08:54:45 1 1036 days 22:35:22.839129 drivers:scsi
1 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 2015-11-26 08:54:45 2 1036 days 22:35:22.839129 drivers:scsi
2 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 2015-11-26 08:54:45 3 1036 days 22:35:22.839129 drivers:scsi
3 drivers/scsi/bfa/bfad_drv.h Jing Huang 2009-09-24 00:46:15 4 3291 days 06:43:52.839129 drivers:scsi
4 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 2015-11-26 08:54:45 5 1036 days 22:35:22.839129 drivers:scsi

In [8]:
component_age = blame.groupby('components').age.min().sort_values()
component_age


Out[8]:
components
drivers:scsi           169 days 14:03:58.839129
drivers:i2c            169 days 15:24:33.839129
drivers:net            169 days 15:39:49.839129
drivers:of             170 days 07:58:51.839129
drivers:pci            170 days 11:45:46.839129
drivers:s390           171 days 17:11:30.839129
drivers:infiniband     174 days 06:54:24.839129
drivers:block          174 days 14:33:04.839129
drivers:perf           175 days 03:00:32.839129
drivers:cpufreq        175 days 04:46:18.839129
drivers:dax            175 days 08:05:39.839129
drivers:input          175 days 11:08:10.839129
drivers:media          175 days 21:38:52.839129
drivers:mmc            177 days 09:33:04.839129
drivers:thermal        178 days 20:33:42.839129
drivers:hwtracing      182 days 19:23:19.839129
drivers:rtc            183 days 12:16:02.839129
drivers:bluetooth      183 days 16:01:20.839129
drivers:hv             184 days 09:29:04.839129
drivers:rpmsg          184 days 10:23:24.839129
drivers:parisc         186 days 19:25:45.839129
drivers:tty            187 days 21:46:41.839129
drivers:reset          188 days 18:25:19.839129
drivers:bus            196 days 14:53:11.839129
drivers:ide            197 days 08:42:01.839129
drivers:thunderbolt    202 days 21:13:06.839129
drivers:ssb            204 days 01:06:43.839129
drivers:target         204 days 06:19:58.839129
drivers:w1             205 days 21:06:01.839129
drivers:clocksource    211 days 21:33:57.839129
                                 ...           
drivers:ras            330 days 17:22:10.839129
drivers:connector      343 days 00:06:18.839129
drivers:spmi           400 days 18:43:41.839129
drivers:hwspinlock     499 days 01:30:38.839129
arch:s390              624 days 21:22:28.839129
arch:arc               693 days 12:51:15.839129
include:linux          736 days 20:45:53.839129
arch:x86               744 days 16:24:27.839129
drivers:dca           1049 days 19:44:07.839129
include:media         1113 days 00:29:53.839129
drivers:coresight     1277 days 11:16:27.839129
drivers:base          1632 days 08:00:02.839129
sound:i2c             1962 days 11:35:24.839129
arch:mips             1996 days 10:01:31.839129
arch:sh               2005 days 18:32:19.839129
arch:powerpc          2007 days 20:34:52.839129
arch:sparc            2014 days 01:03:24.839129
drivers:sn            2106 days 08:15:23.839129
drivers:hwmon         2272 days 11:41:52.839129
drivers:xen           2719 days 15:55:12.839129
arch:ia64             2741 days 18:25:21.839129
arch:unicore32        2770 days 18:08:49.839129
drivers:char          2782 days 15:02:45.839129
drivers:serial        2807 days 00:10:14.839129
drivers:mfd           2830 days 09:07:35.839129
include:scsi          3871 days 08:16:31.839129
arch:i386             4009 days 00:05:35.839129
drivers:usb           4166 days 02:32:38.839129
include:asm-arm       4319 days 17:55:14.839129
arch:sparc64          4613 days 20:37:23.839129
Name: age, Length: 73, dtype: timedelta64[ns]

In [9]:
component_age.tail(10)


Out[9]:
components
arch:ia64         2741 days 18:25:21.839129
arch:unicore32    2770 days 18:08:49.839129
drivers:char      2782 days 15:02:45.839129
drivers:serial    2807 days 00:10:14.839129
drivers:mfd       2830 days 09:07:35.839129
include:scsi      3871 days 08:16:31.839129
arch:i386         4009 days 00:05:35.839129
drivers:usb       4166 days 02:32:38.839129
include:asm-arm   4319 days 17:55:14.839129
arch:sparc64      4613 days 20:37:23.839129
Name: age, dtype: timedelta64[ns]

In [10]:
component_age.plot.bar(figsize=(15,5))


Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x1cf388ada90>