In [1]:
df_in = pd.read_csv('download/euro2014.csv.gz', compression='gzip', delimiter=',', low_memory=False)

In [2]:
keep = [
    'CodeInsee',
    'Inscrits',
    'Abstentions',
    'Votants',
    'Blancs',
    'Nuls',
    'Exprimés',
    'LEXG',
    'LUG',
    'LFG',
    'LDVG',
    'LVEC',
    'LDIV',
    'LUC',
    'LUMP',
    'LDVD',
    'LFN',
    'LEXD',
    'NbCand',
]

df_out = df_in[keep]
# df_out['dc'] = df_out.CodeInsee.str.encode('ascii')
# df_out.drop(['CodeInsee'], axis=1, inplace=True)

In [3]:
df_out.set_index('CodeInsee', inplace=True)
df_out.index.name = 'dc'
df_out.index.get_duplicates()


Out[3]:
[]

In [4]:
df_out.index


Out[4]:
Index([u'01001', u'01002', u'01004', u'01005', u'01006', u'01007', u'01008', u'01009', u'01010', u'01011', u'01012', u'01013', u'01014', u'01015', u'01016', u'01017', u'01019', u'01021', u'01022', u'01023', u'01024', u'01025', u'01026', u'01027', u'01028', u'01029', u'01030', u'01031', u'01032', u'01033', u'01034', u'01035', u'01036', u'01037', u'01038', u'01039', u'01040', u'01041', u'01042', u'01043', u'01044', u'01045', u'01046', u'01047', u'01049', u'01050', u'01051', u'01052', u'01053', u'01054', u'01056', u'01057', u'01058', u'01059', u'01060', u'01061', u'01062', u'01063', u'01064', u'01065', u'01066', u'01067', u'01068', u'01069', u'01071', u'01072', u'01073', u'01074', u'01075', u'01076', u'01077', u'01078', u'01079', u'01080', u'01081', u'01082', u'01083', u'01084', u'01085', u'01087', u'01088', u'01089', u'01090', u'01091', u'01092', u'01093', u'01094', u'01095', u'01096', u'01097', u'01098', u'01099', u'01100', u'01101', u'01102', u'01103', u'01104', u'01105', u'01106', u'01107', ...], dtype='object')

In [5]:
df_out.columns


Out[5]:
Index([u'Inscrits', u'Abstentions', u'Votants', u'Blancs', u'Nuls', u'Exprimés', u'LEXG', u'LUG', u'LFG', u'LDVG', u'LVEC', u'LDIV', u'LUC', u'LUMP', u'LDVD', u'LFN', u'LEXD', u'NbCand'], dtype='object')

In [6]:
df_out.tail()


Out[6]:
Inscrits Abstentions Votants Blancs Nuls Exprimés LEXG LUG LFG LDVG LVEC LDIV LUC LUMP LDVD LFN LEXD NbCand
dc
ZS501 496 393 103 7 0 96 4 21 0 5 0 0 13 38 0 15 0 19
ZS502 4449 3701 748 48 22 678 33 210 0 23 0 0 94 190 0 105 0 19
ZW001 9333 4774 4559 33 30 4496 53 1282 0 1077 0 0 308 1568 0 91 0 19
ZX701 4874 4147 727 22 4 701 3 28 0 11 27 0 53 305 0 222 0 19
ZX801 18389 16429 1960 158 107 1695 82 257 0 104 0 0 93 640 0 287 0 19

In [7]:
df_out.to_csv('data/euro2014.tsv', delimiter='\t')

In [8]:
#st = pd.HDFStore('data/data.h5')
#del st['euro2014']
#st['euro2014'] = df_out
# st.append('euro2014',df_out)
#st.close()

In [9]:
df_out.to_hdf('data/data.h5','euro2014', format='f')