In [1]:
import collections

from matplotlib import pyplot as plt
%matplotlib inline
import pandas as pd
pd.set_option('display.max_columns', 500)

In [2]:
filename = 'LBB_XDPDPA_DPAE_20160307_20170407_20170407_165803_sample.csv'

In [10]:
df = pd.read_csv(filename, sep='|', encoding='utf-8', na_filter=False)

In [ ]:
df

In [12]:
column_names = [
 'dc_naf_id',
 'dn_tailleetablissement',
 'kd_dateembauche',
 'dc_typecontrat_id',
 'dd_datefincdd',
 'dc_romev3_1_id',
 'dc_romev3_2_id',
 'nbrjourtravaille',
 'kn_trancheage']

In [ ]:
df[column_names][::1000]

In [43]:
date_series = df.kd_datecreation.astype('datetime64')
date_series.groupby([date_series.dt.year, date_series.dt.month]).count().plot(kind="bar")


Out[43]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f99ef7c8438>

In [44]:
date_series = df.kd_dateembauche.astype('datetime64')
date_series.groupby([date_series.dt.year, date_series.dt.month]).count().plot(kind="bar")


Out[44]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f99ef79e8d0>

In [47]:
date_series = df.kd_datecreation.astype('datetime64')
tmp1 = date_series[date_series.dt.year == 2016]
tmp2 = tmp1[tmp1.dt.month == 3]
tmp2.groupby(tmp2.dt.day).count().plot(kind="bar")


Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f99ef794080>

In [53]:
date_series = df.kd_dateembauche.astype('datetime64')
tmp1 = date_series[date_series.dt.year == 2016]
tmp2 = tmp1[tmp1.dt.month == 12]
tmp2.groupby(tmp2.dt.day).count().plot(kind="bar")


Out[53]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f99ef4be160>