In [1]:
import collections
from matplotlib import pyplot as plt
%matplotlib inline
import pandas as pd
pd.set_option('display.max_columns', 500)
In [2]:
filename = 'LBB_XDPDPA_DPAE_20160307_20170407_20170407_165803_sample.csv'
In [10]:
df = pd.read_csv(filename, sep='|', encoding='utf-8', na_filter=False)
In [ ]:
df
In [12]:
column_names = [
'dc_naf_id',
'dn_tailleetablissement',
'kd_dateembauche',
'dc_typecontrat_id',
'dd_datefincdd',
'dc_romev3_1_id',
'dc_romev3_2_id',
'nbrjourtravaille',
'kn_trancheage']
In [ ]:
df[column_names][::1000]
In [43]:
date_series = df.kd_datecreation.astype('datetime64')
date_series.groupby([date_series.dt.year, date_series.dt.month]).count().plot(kind="bar")
Out[43]:
In [44]:
date_series = df.kd_dateembauche.astype('datetime64')
date_series.groupby([date_series.dt.year, date_series.dt.month]).count().plot(kind="bar")
Out[44]:
In [47]:
date_series = df.kd_datecreation.astype('datetime64')
tmp1 = date_series[date_series.dt.year == 2016]
tmp2 = tmp1[tmp1.dt.month == 3]
tmp2.groupby(tmp2.dt.day).count().plot(kind="bar")
Out[47]:
In [53]:
date_series = df.kd_dateembauche.astype('datetime64')
tmp1 = date_series[date_series.dt.year == 2016]
tmp2 = tmp1[tmp1.dt.month == 12]
tmp2.groupby(tmp2.dt.day).count().plot(kind="bar")
Out[53]: