In [1]:

    
%matplotlib inline

import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from jupyterthemes import jtplot
import re
import unidecode

jtplot.style()

def millions(x, pos):
    return '%1.f M€' % (x*1e-6)

def custom_parser(s):
    if type(s) == str:
        return pd.datetime.strptime(s, '%d/%m/%Y')
    return pd.np.nan

Analyse des avantages



In [2]:

    
avantage_data = pd.read_csv('../../data/tmp/declaration_avantage_2017_07_27_04_00.csv', 
                       parse_dates=['avant_date_signature'], 
                       date_parser=custom_parser,
                       sep=';')









    



/Users/fmassot/Envs/RC3/lib/python3.4/site-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (12,13,14,18,19,25,26,27,28,29,34) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)



In [4]:

    
avantage_data.head()









    Out[4]:






  
    
      
      entreprise_identifiant
      denomination_sociale
      ligne_identifiant
      ligne_rectification
      benef_categorie_code
      categorie
      benef_nom
      benef_prenom
      benef_qualite_code
      qualite
      ...
      benef_etablissement_codepostal
      benef_etablissement_ville
      benef_denomination_sociale
      benef_objet_social
      ligne_type
      avant_date_signature
      avant_montant_ttc
      avant_nature
      avant_convention_lie
      semestre
    
  
  
    
      0
      LFSWZPFF
      AbbVie
      1H12015NR1792
      N
      [PRS]
      Professionnel de santé
      REY
      MARIE
      [10]
      Médecin
      ...
      NaN
      NaN
      NaN
      NaN
      [A]
      2015-02-16
      40
      REPAS
      NaN
      S1
    
    
      1
      LFSWZPFF
      AbbVie
      1H12015NR2265
      N
      [PRS]
      Professionnel de santé
      ORANCEON
      LIONEL
      [10]
      Médecin
      ...
      NaN
      NaN
      NaN
      NaN
      [A]
      2015-03-12
      35
      REPAS
      NaN
      S1
    
    
      2
      IOUJLTOH
      GLOBAL D
      GD-3712
      N
      [PRS]
      Professionnel de santé
      LECONTE
      CAROLE
      [40]
      Chirurgien-dentiste
      ...
      NaN
      NaN
      NaN
      NaN
      [A]
      2016-02-04
      94
      REPAS
      NaN
      S1
    
    
      3
      IOUJLTOH
      GLOBAL D
      GD-3628
      N
      [PRS]
      Professionnel de santé
      HENRY
      FRANCOIS
      [40]
      Chirurgien-dentiste
      ...
      NaN
      NaN
      NaN
      NaN
      [A]
      2016-01-11
      50
      REPAS
      NaN
      S1
    
    
      4
      IOUJLTOH
      GLOBAL D
      GD-3418
      N
      [PRS]
      Professionnel de santé
      JOUINES
      OLIVIER
      [40]
      Chirurgien-dentiste
      ...
      NaN
      NaN
      NaN
      NaN
      [A]
      2016-01-26
      20
      REPAS
      NaN
      S1
    
  

5 rows × 36 columns

Montant total des avantages données aux étudiants depuis 2012



In [5]:

    
print("Total des avantages depuis mi 2013 :", millions(avantage_data.avant_montant_ttc.sum(), ""))
print("Nombre d'avantages :", avantage_data.avant_montant_ttc.shape[0])
print("Montant moyen d'un avantage :", avantage_data.avant_montant_ttc.mean())









    



Total des avantages depuis mi 2013 : 911 M€
Nombre d'avantages : 6894196
Montant moyen d'un avantage : 132.182683956

Evolution annuelle du montant des avantages



In [7]:

    
avantage_data_by_month = avantage_data.groupby(avantage_data.avant_date_signature.dt.to_period("M"))
avantage_data_by_quarter = avantage_data.groupby(avantage_data.avant_date_signature.dt.to_period("Q"))
avantage_data_by_year = avantage_data.groupby(avantage_data.avant_date_signature.dt.to_period("A"))
avantage_data_by_year.avant_montant_ttc.sum()









    Out[7]:





avant_date_signature
2012     31882376
2013    150507607
2014    242825450
2015    258937971
2016    227139927
Freq: A-DEC, Name: avant_montant_ttc, dtype: int64



In [8]:

    
with sns.axes_style("whitegrid"):
    sns.set_context("talk")
    plot = avantage_data_by_year.avant_montant_ttc.sum().plot()
    formatter = mpl.ticker.FuncFormatter(millions)
    plot.yaxis.set_major_formatter(formatter)
    plot = avantage_data_by_quarter.avant_montant_ttc.sum().plot()
    formatter = mpl.ticker.FuncFormatter(millions)
    plot.yaxis.set_major_formatter(formatter)

Montant des avantages par type



In [9]:

    
repas_re = re.compile('(repas|restauration|restaurant|dejeuner|diner|collation|bouche|coktail|cocktail|traiteur|buffet|reception)')
transport_re = re.compile('(transport|deplacement|train)')
hebergement_re = re.compile('(hebergement|hospitalite|nuit|voyage|hotel|bergement)')
congre_re = re.compile("(inscription|congres|invitation|conferences)")
autre_re = re.compile("(autre|divers|cadeau)")
don_re = re.compile("don")

def normalize_nature(nature):
    nature = unidecode.unidecode(nature).lower()
    if repas_re.search(nature):
        return "REPAS"
    if hebergement_re.search(nature):
        return "HEBERGEMENT"
    if transport_re.search(nature):
        return "TRANSPORT"
    if congre_re.search(nature):
        return "CONGRES"
    if don_re.search(nature):
        return "DON"
    if autre_re.search(nature):
        return "AUTRE"
    
    return nature

avantage_data["normalized_avant_nature"] = avantage_data.avant_nature.apply(normalize_nature)
avant_by_nature = avantage_data.groupby('normalized_avant_nature').sum()



In [10]:

    
avant_by_nature.reset_index(level=0, inplace=True)
avant_by_nature.sort_values(by="avant_montant_ttc", ascending=False, inplace=True)



In [11]:

    
with sns.axes_style("whitegrid"):
    sns.set_context("talk")
    sns.barplot(x="avant_montant_ttc", y="normalized_avant_nature", data=avant_by_nature[:10], label="Total", color="b")



In [ ]:

	entreprise_identifiant	denomination_sociale	ligne_identifiant	ligne_rectification	benef_categorie_code	categorie	benef_nom	benef_prenom	benef_qualite_code	qualite	...	benef_etablissement_codepostal	benef_etablissement_ville	benef_denomination_sociale	benef_objet_social	ligne_type	avant_date_signature	avant_montant_ttc	avant_nature	avant_convention_lie	semestre
0	LFSWZPFF	AbbVie	1H12015NR1792	N	[PRS]	Professionnel de santé	REY	MARIE	[10]	Médecin	...	NaN	NaN	NaN	NaN	[A]	2015-02-16	40	REPAS	NaN	S1
1	LFSWZPFF	AbbVie	1H12015NR2265	N	[PRS]	Professionnel de santé	ORANCEON	LIONEL	[10]	Médecin	...	NaN	NaN	NaN	NaN	[A]	2015-03-12	35	REPAS	NaN	S1
2	IOUJLTOH	GLOBAL D	GD-3712	N	[PRS]	Professionnel de santé	LECONTE	CAROLE	[40]	Chirurgien-dentiste	...	NaN	NaN	NaN	NaN	[A]	2016-02-04	94	REPAS	NaN	S1
3	IOUJLTOH	GLOBAL D	GD-3628	N	[PRS]	Professionnel de santé	HENRY	FRANCOIS	[40]	Chirurgien-dentiste	...	NaN	NaN	NaN	NaN	[A]	2016-01-11	50	REPAS	NaN	S1
4	IOUJLTOH	GLOBAL D	GD-3418	N	[PRS]	Professionnel de santé	JOUINES	OLIVIER	[40]	Chirurgien-dentiste	...	NaN	NaN	NaN	NaN	[A]	2016-01-26	20	REPAS	NaN	S1