Open the Mercasa files and get total and per country list
In [16]:
import matplotlib.pyplot as plt
import matplotlib
import os
import pandas as pd
import numpy as np
%matplotlib inline
matplotlib.style.use('ggplot')
import datetime
import locale
import time
locale.setlocale(locale.LC_TIME, 'es_ES.UTF-8')
import glob
In [17]:
product = 'acelga/'
pro = 'production/'
imp = 'import/'
pwd = '/Volumes/Macintosh HD/_GitHub/journey-of-food/data/producto/'
aux = '/Volumes/Macintosh HD/_GitHub/journey-of-food/data/aux/'
dwd = '/Volumes/Macintosh HD/_GitHub/journey-of-food/data/source/'
In [18]:
os.chdir(dwd)
files = os.listdir(dwd)
files
Out[18]:
In [19]:
data = pd.read_excel('FH_IPRODPAIK.xlsx', sheetname='2015', encoding ='utf-8',skiprows=[2],index_col=0,na_values='x')
data.fillna(0,inplace=True)
In [20]:
data.head()
Out[20]:
In [21]:
data.drop('%cuota',axis=0,inplace=True)
data.drop('TOTAL HORTALIZAS',axis=0,inplace=True)
data.drop('OTRAS HORTALIZAS',axis=0,inplace=True)
data.drop('OTROS CÍTRICOS',axis=0,inplace=True)
data.drop('OTRAS FRUTAS',axis=0,inplace=True)
data.drop('TOTAL FRUTAS',axis=0,inplace=True)
data.drop('TOTAL F. Y H.',axis=0,inplace=True)
data.drop('TOTAL',axis=1,inplace=True)
data.drop('TOTAL UE-27',axis=1,inplace=True)
data.drop('T. HORTALIZAS',axis=0,inplace=True)
data.drop('* Datos provisionales',axis=0,inplace=True)
In [22]:
data = data.loc[pd.notnull(data.index)]
In [23]:
data.index = [x.lower() for x in data.index.values.tolist()]
In [24]:
data.loc[data.index=='acelga'].transpose().sort_values('acelga',ascending=False).sum()
Out[24]:
In [25]:
for producto in data.index.values:
datos = data.loc[data.index==producto].transpose().sort_values(producto,ascending=False)
datos.columns = ['Importado']
datosSum = pd.DataFrame(datos.sum())
datosSum.columns = ['Ton']
datos['Percent'] = datos['Importado']/datos['Importado'].sum()*100.0
datos.Percent = datos.Percent.round(2)
datos.index.name = 'Pais'
datosSum.index.name = 'Total'
try:
datos[:3].to_csv(pwd+producto+'/'+producto+'_import_country.csv')
datosSum.to_csv(pwd+producto+'/'+producto+'_import_total.csv')
except:
datos[:3].to_csv(pwd+producto+'_import_country.csv')
datosSum.to_csv(pwd+producto+'_import_total.csv')
In [26]:
data
Out[26]:
In [27]:
names = pd.read_csv(aux+'map_names_import.csv',index_col=1)
In [390]:
In [ ]: