Script to open and plot data from the Datacomex platform. Using tomate and import data as sample.
In [1]:
import matplotlib.pyplot as plt
import matplotlib
import os
import pandas as pd
import numpy as np
%matplotlib inline
matplotlib.style.use('ggplot')
In [2]:
product = 'tomate'
tipo = 'import'
In [5]:
pwd = '/Volumes/MacintoshHD/_GitHub/journey-of-food/data/products/'
os.chdir(pwd+product+'/'+tipo)
files = os.listdir()
files
Out[5]:
In [6]:
#dataList = []
#for file in files:
# data = pd.read_csv(file, delimiter=',', decimal=',', parse_dates=['subfila'])
# dataList.append(data)
data = pd.read_csv('DataComex_170928165640.csv', delimiter=',', decimal=',', parse_dates=['subfila'])
In [7]:
#data = pd.concat(dataList)
data = data.fillna(0)
data['tipo'] = tipo
In [8]:
data.sort_values('valor',ascending=False).head(4)
Out[8]:
In [468]:
#dataTotalSubfila = data.loc[data.subfila=='Total seleccionado']
dataTotalColumna = data.loc[data.columna=='Total Mundo']
dataTotalSubcolumna = data.loc[data.subcolumna=='Total modo transporte']
#data = data[data.subfila!='Total seleccionado']
data = data[data.columna!='Total Mundo']
data = data[data.subcolumna!='Total modo transporte']
In [469]:
countryList=[]
for country in data.columna.values.tolist():
countryList.append(country.replace(' (d.01/01/86)', ''))
data.columna = countryList
countryList=[]
for country in data.columna.values.tolist():
countryList.append(country.replace(' (d.01/01/99)', ''))
data.columna = countryList
countryList=[]
for country in data.columna.values.tolist():
countryList.append(country.replace(' (d.01/01/99)', ''))
data.columna = countryList
countryList=[]
for country in data.columna.values.tolist():
countryList.append(country.replace(' (d.01/01/95)', ''))
data.columna = countryList
countryList=[]
for country in data.columna.values.tolist():
countryList.append(country.replace(' (d.01/01/92)', ''))
data.columna = countryList
countryList=[]
for country in data.columna.values.tolist():
countryList.append(country.replace(' (d.01/06/05)', ''))
data.columna = countryList
countryList=[]
for country in data.columna.values.tolist():
countryList.append(country.replace(' (d.01/01/93)', ''))
data.columna = countryList
countryList=[]
for country in data.columna.values.tolist():
countryList.append(country.replace(' (d.01/01/90)', ''))
data.columna = countryList
countryList=[]
for country in data.columna.values.tolist():
countryList.append(country.replace(' (h.31/12/98)', ''))
data.columna = countryList
countryList=[]
for country in data.columna.values.tolist():
countryList.append(country.replace('(d.01/01/86)', ''))
data.columna = countryList
countryList=[]
for country in data.columna.values.tolist():
countryList.append(country.replace('(h.31/12/96)', ''))
data.columna = countryList
data.columna.values.tolist()
Out[469]:
In [470]:
data.columna = data.columna.astype('category')
data.subcolumna = data.subcolumna.astype('category')
In [471]:
data['Percent'] = data.valor/data.valor.sum()
In [472]:
data.sort_values('valor',ascending=False).head(6)
Out[472]:
In [463]:
data.columna = data.columna.apply(lambda x: x.__str__()[3:])
data.subcolumna = data.subcolumna.apply(lambda x: x.__str__()[3:])
In [451]:
import locale
import time
locale.setlocale(locale.LC_TIME, 'es_ES.UTF-8')
Out[451]:
In [452]:
data.subfila
data.subfila = data.subfila.apply(lambda x: time.strptime(x.__str__(), "%B de %Y"))
data.subfila = data.subfila.apply(lambda x: time.strftime('%Y-%m-%d %H:%M:%S', x))
In [ ]:
In [453]:
data.info()
In [394]:
data.columns = ['producto', 'fecha', 'pais', 'transporte', 'Ton', 'tipo','Percent']
In [395]:
data = data.loc[data.Ton>0]
In [396]:
data.to_csv(product+'_'+tipo+'_clean.csv', encoding='utf-8')
In [397]:
data = pd.read_csv(product+'_'+tipo+'_clean.csv', encoding = 'utf-8',parse_dates=['fecha'])
data.fillna(0,inplace=True)
In [398]:
data.info()
In [399]:
data.pais = data.pais.astype('category')
data.transporte = data.transporte.astype('category')
data.producto = data.producto.astype('category')
In [400]:
area = data.pivot_table(index='pais',columns='fecha',values='Ton')
area.fillna(0,inplace=True)
area['Total'] = area.sum(axis=1)
In [401]:
area.sort_values('Total',ascending=False).head()
Out[401]:
In [402]:
area.sort_values('Total',ascending=False).head(5).drop('Total',axis=1).transpose().plot.area(stacked=True)
plt.ylabel('Toneladas')
plt.xlabel('Mes')
plt.title('Evolución por país de importaciones')
plt.savefig('evolucion_pais.png')
In [403]:
data.Ton.groupby(data.pais).sum().sort_values(ascending=False).head(10).plot('barh')
plt.savefig('por_pais.png')
In [377]:
datos = data.groupby([data.fecha,data.pais]).max()
In [378]:
datos.sort_values('Ton',ascending=False,inplace=True)
datos = datos.groupby('fecha').head(1).reset_index()
datos.sort_values('fecha',ascending=True,inplace=True)
In [368]:
datos.drop('Unnamed: 0',axis=1,inplace=True)
datos.set_index('fecha',inplace=True)
In [369]:
datos
Out[369]:
In [370]:
datos.to_csv(pwd+product+'/import_pais_mes.csv',encoding='utf-8')
In [371]:
dataArg = data.loc[data.pais == ' Argentina']
dataArg.transporte = dataArg.transporte.apply(lambda x: 'Marítimo')
In [372]:
data.head()
Out[372]:
In [373]:
data.pivot_table(index='pais', columns=data.fecha.dt.year, values='kg')
In [55]:
datos = pd.melt(data,id_vars=['fecha', 'pais'],value_vars='kg')
In [56]:
max(datos.groupby('fecha'))
Out[56]:
In [ ]:
In [57]:
#from geopy.geocoders import Nominatim
#from mpl_toolkits.basemap import Basemap
#import matplotlib.cm as cm
In [190]:
#geolocator = Nominatim()
#location = geolocator.geocode("Spain")
#location[1][0]
In [191]:
#data['lat'] = data.index.tolist()
#data['lat'] = data.lat.apply(lambda x: geolocator.geocode(x)[1][0])
#data['lon'] = data.index
#data['lon'] = data.lat.apply(lambda x: geolocator.geocode(x)[1][1])
#data['geo'] = data.index
#data['geo'] = data.lat.apply(lambda x: geolocator.geocode(x))
#x = data.lon.values
#y = data.lat.values
#z = data.ACELGA.values
#map = Basemap()
#map.drawcoastlines(linewidth=0.5)
#map.scatter(x, y, marker='o',s = z)
In [ ]:
In [ ]: