In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import os
In [2]:
path_data = 'G://APM//Dados//EUR'
In [3]:
os.listdir(path_data)
Out[3]:
In [19]:
file_agreg = path_data + '/IHPC_AGREG_EUR.xlsm'
df_agreg = pd.read_excel(file_agreg,sheetname='DADOS_AGREG',na_values='ND',
skiprows=[0,1,2,],index_col=0,)
df_agreg.head()
Out[19]:
In [20]:
# save the names of the items as a dictionary
# remove the row with the names
agreg_names = dict(df_agreg.iloc[0])
agreg_names
Out[20]:
In [5]:
df_agreg = df_agreg.drop(df_agreg.index[0])
df_agreg.tail()
Out[5]:
In [6]:
mask_rows_healine = df_agreg.index.year >= 1995
df_headline = df_agreg.loc[mask_rows_healine,[76451]]
df_headline.head()
Out[6]:
In [7]:
file_items = path_data + '/IHPC_SUBIND_EUR.xlsm'
In [8]:
rows_to_skip = list(np.arange(3))
#rows_to_skip += list(np.arange(5,17))
rows_to_skip
Out[8]:
In [9]:
df_ind_items = pd.read_excel(file_items,sheetname='DADOS_SUBIND',na_values='ND',
skiprows=rows_to_skip,index_col=0,)
df_ind_items.head()
Out[9]:
In [10]:
# save the names of the items as a dictionary
# remove the row with the names
item_names = dict(df_ind_items.iloc[0])
item_names;
df_ind_items = df_ind_items.drop(df_ind_items.index[0])
In [11]:
df_ind_items.head()
Out[11]:
In [12]:
## Join headline and items in a single data frame
print(df_headline.shape)
print(df_ind_items.shape)
In [13]:
df_headline = df_headline[df_headline.index.year>=1999]
df_ind_items = df_ind_items[df_ind_items.index.year>=1999]
In [14]:
df_ind_items = df_headline.join(other=df_ind_items)
df_ind_items.head()
Out[14]:
In [15]:
df_ind_items.to_csv('raw_data_items.csv',index=True,index_label='date',header=True)
In [16]:
df_ind_items.shape
Out[16]:
In [17]:
df_ind_items.tail()
Out[17]:
In [ ]: