In [13]:
import pandas as pd

data = pd.read_csv("../demos/dataset/linux_blame_log.csv")
data['author'] = pd.Categorical(data.author)
data.head()


Out[13]:
path author timestamp line
0 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 1448528085000000000 1
1 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 1448528085000000000 2
2 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 1448528085000000000 3
3 drivers/scsi/bfa/bfad_drv.h Jing Huang 1253753175000000000 4
4 drivers/scsi/bfa/bfad_drv.h Anil Gurumurthy 1448528085000000000 5

In [14]:
grouped_files = data.groupby(['path', 'author']).line.count()
grouped_files.head(10)


Out[14]:
path                                author             
arch/arc/kernel/time.c              Anna-Maria Gleixner     13
                                    Daniel Lezcano          31
                                    Noam Camus              18
                                    Vineet Gupta           243
                                    Viresh Kumar             6
arch/arm/common/timer-sp.c          Linus Walleij            3
                                    Michael Opdenacker       1
                                    Rob Herring            111
                                    Russell King            44
                                    Stephen Boyd             3
Name: line, dtype: int64

In [26]:
import pygal
from pygal import Config

config = Config()
config.show_legend = False
config.human_readable = True
config.fill = True
config.margin = 0
config.width = 200
config.height = 200
config.width = 20
config.height = 20

for group in grouped_files.groupby(level=0):
    
    treemap = pygal.Treemap(config)
    filename = group[0].replace("/", "-").replace(".", "_").strip() + ".png"
    data = group[1].reset_index()
    
    for item in data.iterrows():
        entry = item[1]
        treemap.add(entry['author'], entry['line'])
  
    treemap.render_to_png('treemaps_mini/' + filename)

In [4]:
grouped = data.groupby('path').author.nunique()
grouped.head()


Out[4]:
path
drivers/scsi/bfa/bfad_im.c    15
Name: author, dtype: int64

In [ ]:
import pygal

treemap = pygal.Treemap()

for entry in grouped.iteritems():
    treemap.add(entry[0], entry[1])

treemap.render_to_file("test.svg")