Process Mercasa export per country and total data


In [13]:
import matplotlib.pyplot as plt
import matplotlib
import os
import pandas as pd
import numpy as np
%matplotlib inline
matplotlib.style.use('ggplot')
import datetime
import locale
import time
locale.setlocale(locale.LC_TIME, 'es_ES.UTF-8')
import glob

In [14]:
product = 'acelga/'
pro = 'production/'
imp = 'export/'
pwd = '/Volumes/Macintosh HD/_GitHub/journey-of-food/data/producto/'
aux = '/Volumes/Macintosh HD/_GitHub/journey-of-food/data/aux/'
dwd = '/Volumes/Macintosh HD/_GitHub/journey-of-food/data/source/'

In [15]:
os.chdir(dwd)
files = os.listdir(dwd)
files


Out[15]:
['FH_EPRODMESK.xlsx',
 'FH_EPRODPAISK.xlsx',
 'FH_EPROVPRODK.xlsx',
 'FH_IPRODMESK.xlsx',
 'FH_IPRODPAIK.xlsx',
 'import_2015.csv',
 '~$FH_IPRODPAIK.xlsx']

In [16]:
data = pd.read_excel(files[1], sheetname='2015', encoding ='utf-8',index_col=0,na_values='x')
data.fillna(0,inplace=True)

In [17]:
data.head()


Out[17]:
ALEMANIA AUSTRIA BÉLGICA BULGARIA CHIPRE CROACIA DINAMARCA ESLOVAQUIA ESLOVENIA ESTONIA ... PAISES BAJOS POLONIA PORTUGAL REINO UNIDO REP. CHECA RUMANIA SUECIA TOTAL UE-27 EXTRE UE TOTAL
ACELGA 134.0 26.0 356.0 0.0 0.0 0.0 19.0 0.0 0.0 0.0 ... 192.0 2.0 18.0 341.0 0.0 0.0 24.0 2717.0 81.0 2798.0
AJO 13205.0 297.0 927.0 1792.0 0.0 117.0 784.0 451.0 79.0 10.0 ... 2977.0 5485.0 12621.0 12944.0 2836.0 4136.0 641.0 100292.0 48913.0 149205.0
ALCACHOFA 942.0 0.0 429.0 15.0 0.0 9.0 23.0 0.0 11.0 0.0 ... 518.0 0.0 53.0 129.0 8.0 0.0 110.0 14390.0 88.0 14478.0
APIO 11854.0 325.0 3743.0 37.0 4.0 18.0 1309.0 110.0 2.0 121.0 ... 15360.0 3617.0 343.0 29652.0 1352.0 119.0 1148.0 85388.0 2430.0 87818.0
BERENJENA 33216.0 1109.0 2399.0 7285.0 460.0 276.0 1514.0 358.0 151.0 115.0 ... 14371.0 2566.0 2105.0 10153.0 2088.0 1657.0 1161.0 157106.0 2095.0 159201.0

5 rows × 30 columns


In [18]:
data.drop('%cuota',axis=0,inplace=True)
data.drop('TOTAL HORTALIZAS',axis=0,inplace=True)
data.drop('OTRAS HORTALIZAS',axis=0,inplace=True)
data.drop('OTROS CÍTRICOS',axis=0,inplace=True)
data.drop('OTRAS FRUTAS',axis=0,inplace=True)
data.drop('TOTAL FRUTAS',axis=0,inplace=True)
data.drop('TOTAL F. Y H.',axis=0,inplace=True)
data.drop('TOTAL',axis=1,inplace=True)
data.drop('TOTAL UE-27',axis=1,inplace=True)
data.drop('T. HORTALIZAS',axis=0,inplace=True)
data.drop('* Datos provisionales',axis=0,inplace=True)

In [19]:
data = data.loc[pd.notnull(data.index)]

In [20]:
data.index = [x.lower() for x in data.index.values.tolist()]

In [21]:
data.loc[data.index=='acelga'].transpose().sort_values('acelga',ascending=False).sum()


Out[21]:
acelga    2798.0
dtype: float64

In [22]:
for producto in data.index.values:
    datos = data.loc[data.index==producto].transpose().sort_values(producto,ascending=False)
    datos.columns = ['Exportado']
    datosSum = pd.DataFrame(datos.sum())
    datosSum.columns = ['Ton']
    datos['Percent'] = datos['Exportado']/datos['Exportado'].sum()*100.0
    datos.Percent = datos.Percent.round(2)
    datos.index.name = 'Pais'
    datosSum.index.name = 'Total'
    try:
        datos[:3].to_csv(pwd+producto+'/'+producto+'_export_country.csv')
        datosSum.to_csv(pwd+producto+'/'+producto+'_export_total.csv')
    except:
        datos[:3].to_csv(pwd+producto+'_export_country.csv')
        datosSum.to_csv(pwd+producto+'_export_total.csv')

In [23]:
data.head()


Out[23]:
ALEMANIA AUSTRIA BÉLGICA BULGARIA CHIPRE CROACIA DINAMARCA ESLOVAQUIA ESLOVENIA ESTONIA ... LUXEMBURGO MALTA PAISES BAJOS POLONIA PORTUGAL REINO UNIDO REP. CHECA RUMANIA SUECIA EXTRE UE
acelga 134.0 26.0 356.0 0.0 0.0 0.0 19.0 0.0 0.0 0.0 ... 0.0 0.0 192.0 2.0 18.0 341.0 0.0 0.0 24.0 81.0
ajo 13205.0 297.0 927.0 1792.0 0.0 117.0 784.0 451.0 79.0 10.0 ... 10.0 104.0 2977.0 5485.0 12621.0 12944.0 2836.0 4136.0 641.0 48913.0
alcachofa 942.0 0.0 429.0 15.0 0.0 9.0 23.0 0.0 11.0 0.0 ... 1.0 5.0 518.0 0.0 53.0 129.0 8.0 0.0 110.0 88.0
apio 11854.0 325.0 3743.0 37.0 4.0 18.0 1309.0 110.0 2.0 121.0 ... 0.0 10.0 15360.0 3617.0 343.0 29652.0 1352.0 119.0 1148.0 2430.0
berenjena 33216.0 1109.0 2399.0 7285.0 460.0 276.0 1514.0 358.0 151.0 115.0 ... 42.0 5.0 14371.0 2566.0 2105.0 10153.0 2088.0 1657.0 1161.0 2095.0

5 rows × 28 columns


In [24]:
names = pd.read_csv(aux+'map_names_import.csv',index_col=1)

In [390]:


In [ ]: