In [1]:
#ces parties représente le code utilisé pour réaliser l'exploitation ci dessous
import sys
import pandas as pd
import numpy as np
import json
In [68]:
#Chargement des données
labo = pd.read_csv("BAYER.anon.csv", encoding='utf-8')
labo = labo[labo.DECL_AVANT_DATE != 'None']
labo = labo.dropna(subset = ['DECL_AVANT_DATE'])
labo = labo[labo.DECL_AVANT_DATE.str.contains(r'^20\d\d-')]
avantages = labo[['DECL_AVANT_DATE','DECL_AVANT_MONTANT', 'BENEF_PS_QUALIFICATION', 'ORIGIN']]
avantages.index = pd.to_datetime(avantages['DECL_AVANT_DATE'])
In [69]:
avantages['DECL_AVANT_MONTANT'].resample('BQ', how='count').plot()
Out[69]:
In [78]:
avantages['DECL_AVANT_MONTANT'].resample('BQ', how='sum').plot()
Out[78]:
En période de crise, il est logique que des contacts soient établis (et des petits cadeaux commes de repas organisés) que des cadeaux à plusieurs milliers d'euros comme en 2012)
In [101]:
avantages[(avantages.index > '2013-01-01') & (avantages.index < '2013-07-01')].groupby(['ORIGIN', 'BENEF_PS_QUALIFICATION'])[['ORIGIN']].count().sort('ORIGIN', ascending=False)
Out[101]:
In [102]:
avantages[avantages.BENEF_PS_QUALIFICATION == u'Gynécologie']['DECL_AVANT_MONTANT'].resample('BQ', how='count').plot()
Out[102]:
In [74]:
avantages[avantages.BENEF_PS_QUALIFICATION == u'Médecine générale']['DECL_AVANT_MONTANT'].resample('BQ', how='count').plot()
Out[74]:
In [119]:
#Chargement des données
labo = pd.read_csv("EFFIK.anon.csv", encoding='utf-8')
labo = labo[labo.DECL_AVANT_DATE != 'None']
labo = labo.dropna(subset = ['DECL_AVANT_DATE'])
labo = labo[labo.DECL_AVANT_DATE.str.contains(r'^20\d\d-')]
avantages = labo[['DECL_AVANT_DATE','DECL_AVANT_MONTANT', 'BENEF_PS_QUALIFICATION', 'ORIGIN']]
avantages.index = pd.to_datetime(avantages['DECL_AVANT_DATE'])
In [120]:
avantages['DECL_AVANT_MONTANT'].resample('BQ', how='count').plot()
Out[120]:
In [136]:
avantages[(avantages['BENEF_PS_QUALIFICATION'] == u'Gynécologie')|(avantages['BENEF_PS_QUALIFICATION'] == u'Médecine générale')].groupby(['ORIGIN', 'BENEF_PS_QUALIFICATION'])[['ORIGIN']].resample('BQ', how='count')
Out[136]:
Les dates correspondent à des semestres
In [2]:
#Chargement des données
labo = pd.read_csv("BIOGARAN.anon.csv", encoding='utf-8')
labo = labo[labo.DECL_AVANT_DATE != 'None']
labo = labo.dropna(subset = ['DECL_AVANT_DATE'])
labo = labo[labo.DECL_AVANT_DATE.str.contains(r'^20\d\d-')]
avantages = labo[['DECL_AVANT_DATE','DECL_AVANT_MONTANT', 'BENEF_PS_QUALIFICATION', 'ORIGIN']]
avantages.index = pd.to_datetime(avantages['DECL_AVANT_DATE'])
In [8]:
avantages['DECL_AVANT_MONTANT'].resample('BQ', how='count').plot()
Out[8]:
In [10]:
avantages.groupby(['ORIGIN', 'BENEF_PS_QUALIFICATION'])[['DECL_AVANT_MONTANT']].count()
Out[10]:
Sans doute du à son statut de génériqueur
In [2]:
#Chargement des données
gynecos = pd.read_csv("gynecos.anon.csv", encoding='utf-8')
gynecos = gynecos[gynecos.BENEF_PS_QUALIFICATION == u'Gynécologie']
gynecos = gynecos[gynecos.DECL_TYPE == 'AVANTAGE']
gynecos = gynecos[gynecos.DECL_AVANT_DATE.str.contains(r'^20\d\d-')]
gynecos.index = pd.to_datetime(gynecos['DECL_AVANT_DATE'])
In [3]:
gynecos.LABO.count()
Out[3]:
In [4]:
gynecos['DECL_AVANT_MONTANT'].resample('BQ', how='count').plot()
Out[4]:
In [5]:
gynecosParLabos = gynecos[['DECL_AVANT_MONTANT', 'LABO']].groupby('LABO').sum().sort('DECL_AVANT_MONTANT', ascending=False)
gynecosParLabos['count'] = gynecos[['DECL_AVANT_MONTANT', 'LABO']].groupby('LABO').count()
gynecosParLabos['moyenne'] = gynecosParLabos['DECL_AVANT_MONTANT']/gynecosParLabos['count']
gynecosParLabos['porportion_count'] = gynecosParLabos['count'] * 100 / gynecos.LABO.count()
gynecosParLabos['porportion_montant'] = gynecosParLabos['DECL_AVANT_MONTANT'] * 100 / gynecos.DECL_AVANT_MONTANT.sum()
gynecosParLabos
Out[5]:
30% des cadeaux aux gynécos viennent d'un laboratoire : Theramex SAM
In [6]:
#Theramex SAM
gynecos[gynecos.LABO == 'Theramex SAM']['DECL_AVANT_MONTANT'].resample('BQ', how='count').plot()
Out[6]:
In [7]:
gynecos[gynecos.LABO != 'Theramex SAM']['DECL_AVANT_MONTANT'].resample('BQ', how='count').plot()
Out[7]:
In [8]:
gynecos[gynecos.LABO != 'Theramex SAM']['DECL_AVANT_MONTANT'].resample('BQ', how='sum').plot()
Out[8]:
In [29]:
semestres = gynecos.groupby(['LABO'])[['DECL_AVANT_MONTANT']].resample('BQ', how='count')
semestres[semestres.DECL_AVANT_MONTANT > 100]
Out[29]:
In [ ]: