Hello

  • Analyse

In [1]:
import pandas as pd

df = pd.read_csv("../dataset/git_demo_timestamp_linux.csv", sep=";")
df.author.value_counts().head(10)


Out[1]:
Linus Torvalds           24259
David S. Miller           9563
Mark Brown                6917
Takashi Iwai              6293
Al Viro                   6064
H Hartley Sweeten         5942
Ingo Molnar               5462
Mauro Carvalho Chehab     5384
Arnd Bergmann             5305
Greg Kroah-Hartman        4687
Name: author, dtype: int64

In [2]:
%matplotlib inline
df.author.value_counts().head(10).plot(kind='pie')


Out[2]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a730dd3a90>

In [3]:
df['timestamp_local'] = pd.to_datetime(df['timestamp_local'])
df.describe()


Out[3]:
timestamp_local author
count 723214 723213
unique 691746 17877
top 2017-11-01 03:56:19 Linus Torvalds
freq 137 24259
first 2005-04-16 15:20:36 NaN
last 2017-12-31 16:52:15 NaN

In [4]:
df.timestamp_local.dt.year.value_counts(sort=False).plot(kind='bar')


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a730e93a90>

In [5]:
df.timestamp_local.dt.hour.value_counts(sort=False).plot(kind='bar')


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a7331db2b0>