get-raw-data



In [1]:
%matplotlib inline

import numpy as np
import pandas as pd

import os

In [4]:
path_data = 'G:/APM/Dados/MONET FINANC/Indicadores inflação/InfSwaps.xlsx'

In [5]:
df_raw = pd.read_excel(path_data,sheetname='data',skiprows=[0,1,3,4],index_col=0)
df_raw.head()


Out[5]:
1Y 2Y 3Y 4Y 5Y 6Y 7Y 8Y 9Y 10Y ... Unnamed: 17 Unnamed: 18 Unnamed: 19 Unnamed: 20 Unnamed: 21 Unnamed: 22 Unnamed: 23 Unnamed: 24 Unnamed: 25 Unnamed: 26
2004-06-22 2.165 2.2600 2.315 2.3350 2.3800 2.3850 2.4200 2.4100 2.4300 2.4700 ... 2.165 2.355088 2.425089 2.395023 2.560198 2.410004 2.630251 2.340027 2.590141 2.830704
2004-06-23 2.185 2.2575 2.270 2.2925 2.3475 2.3650 2.3925 2.4025 2.4225 2.4325 ... 2.185 2.330051 2.295005 2.360030 2.567796 2.452545 2.557655 2.472527 2.582641 2.522544
2004-06-24 2.155 2.2000 2.245 2.2675 2.3000 2.3475 2.3500 2.3900 2.4100 2.4100 ... 2.155 2.245020 2.335059 2.335030 2.430103 2.585331 2.365001 2.670438 2.570141 2.410000
2004-06-25 2.155 2.2200 2.245 2.2675 2.3200 2.3475 2.3700 2.3900 2.4100 2.4200 ... 2.155 2.285041 2.295018 2.335030 2.530270 2.485111 2.505104 2.530109 2.570141 2.510044
2004-06-28 2.095 2.2200 2.260 2.2775 2.3200 2.3525 2.3700 2.3875 2.4075 2.4300 ... 2.095 2.345153 2.340047 2.330018 2.490177 2.515155 2.475063 2.510084 2.567641 2.632723

5 rows × 26 columns


In [12]:
df_raw.to_csv('raw_data.csv',header=True,index=True,index_label='date')

In [6]:
df_raw0 = df_raw

In [7]:
df_raw = df_raw0

In [8]:
cmask = [cname for cname in df_raw.columns if 'Unnamed' not in cname]
df_raw = df_raw.loc[:,cmask]
df_raw.head()


Out[8]:
1Y 2Y 3Y 4Y 5Y 6Y 7Y 8Y 9Y 10Y 12Y 15Y 20Y 25Y 30Y
2004-06-22 2.165 2.2600 2.315 2.3350 2.3800 2.3850 2.4200 2.4100 2.4300 2.4700 2.5000 2.550 2.6300 2.710 2.7200
2004-06-23 2.185 2.2575 2.270 2.2925 2.3475 2.3650 2.3925 2.4025 2.4225 2.4325 2.4825 2.545 2.6225 2.695 2.7200
2004-06-24 2.155 2.2000 2.245 2.2675 2.3000 2.3475 2.3500 2.3900 2.4100 2.4100 2.4400 2.500 2.6000 2.680 2.7100
2004-06-25 2.155 2.2200 2.245 2.2675 2.3200 2.3475 2.3700 2.3900 2.4100 2.4200 2.4600 2.510 2.6200 2.690 2.7188
2004-06-28 2.095 2.2200 2.260 2.2775 2.3200 2.3525 2.3700 2.3875 2.4075 2.4300 2.4600 2.520 2.6200 2.700 2.7200

tt = [cname.split()[0].split('EUSWI')[1] for cname in df_raw.columns if 'CMPN' in cname ] len(tt)

df_raw.columns = [c.split()[0].split('EUSWI')[1] + 'y' for c in df_raw.columns]


In [11]:
df_raw.tail()


Out[11]:
1Y 2Y 3Y 4Y 5Y 6Y 7Y 8Y 9Y 10Y 12Y 15Y 20Y 25Y 30Y
2017-12-13 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2017-12-14 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2017-12-15 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2017-12-18 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2017-12-19 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

In [97]:
def getForward(v,t1=1,t2=2):
    return (np.power(np.power(1+v[1]/100,t2)/np.power(1+v[0]/100,t1),1/(t2-t1))-1)*100

In [98]:
ind1 = 0
ind2 = 1
v2 = df_raw.iloc[-1,ind2]
v1 = df_raw.iloc[-1,ind1]
t1 = int(df_raw.columns[ind1].strip('y'))
t2 = int(df_raw.columns[ind2].strip('y'))
print('v1 is {}, v2 is {}'.format(v1,v2))


v1 is 0.613, v2 is 0.628

In [99]:
v = [v1,v2]
f = getForward(v,t1,t2)

In [100]:
f


Out[100]:
0.64300223629154551

In [101]:
df_raw['1yf1y'] = pd.Series('',index = df_raw.index)
df_raw.tail()


Out[101]:
1y 2y 3y 4y 5y 6y 7y 8y 9y 10y 12y 15y 20y 25y 30y 1yf1y
Ticker/ISIN
2016-04-11 0.595 0.628 0.665 0.691 0.7500 0.795 0.875 0.945 1.013 1.088 1.188 1.282 1.398 1.465 1.538
2016-04-12 0.598 0.633 0.667 0.705 0.7550 0.800 0.875 0.940 1.005 1.080 1.185 1.293 1.398 1.465 1.543
2016-04-13 0.608 0.628 0.660 0.703 0.7550 0.810 0.875 0.940 1.008 1.080 1.180 1.288 1.388 1.464 1.539
2016-04-14 0.625 0.640 0.668 0.702 0.7425 0.803 0.873 0.935 1.000 1.075 1.175 1.269 1.383 1.453 1.529
2016-04-15 0.613 0.628 0.663 0.702 0.7500 0.803 0.870 0.935 1.003 1.078 1.180 1.283 1.388 1.455 1.532

In [102]:
df_raw['1yf1y'] = df_raw[['1y','2y']].apply(getForward,axis=1)

In [103]:
df_raw['1yf1y'] = df_raw[['1y','2y']].apply(lambda x: getForward(x,t1=1,t2=2),axis=1)

In [104]:
df_raw.tail()


Out[104]:
1y 2y 3y 4y 5y 6y 7y 8y 9y 10y 12y 15y 20y 25y 30y 1yf1y
Ticker/ISIN
2016-04-11 0.595 0.628 0.665 0.691 0.7500 0.795 0.875 0.945 1.013 1.088 1.188 1.282 1.398 1.465 1.538 0.661011
2016-04-12 0.598 0.633 0.667 0.705 0.7550 0.800 0.875 0.940 1.005 1.080 1.185 1.293 1.398 1.465 1.543 0.668012
2016-04-13 0.608 0.628 0.660 0.703 0.7550 0.810 0.875 0.940 1.008 1.080 1.180 1.288 1.388 1.464 1.539 0.648004
2016-04-14 0.625 0.640 0.668 0.702 0.7425 0.803 0.873 0.935 1.000 1.075 1.175 1.269 1.383 1.453 1.529 0.655002
2016-04-15 0.613 0.628 0.663 0.702 0.7500 0.803 0.870 0.935 1.003 1.078 1.180 1.283 1.388 1.455 1.532 0.643002

In [105]:
def AddForward(df,t1=1,t2=2):
    
    #add an empty column
    df[str(t2-t1)+'yf'+str(t1)+'y'] = pd.Series('',index = df.index)
    df[str(t2-t1)+'yf'+str(t1)+'y'] = df[[str(t1)+'y',str(t2)+'y']].apply(lambda x: getForward(x,t1,t2),axis=1)

In [110]:
AddForward(df_raw,t1=3,t2=4)

In [111]:
df_raw.tail()


Out[111]:
1y 2y 3y 4y 5y 6y 7y 8y 9y 10y 12y 15y 20y 25y 30y 1yf1y 2yf1y 1yf2y 1yf3y
Ticker/ISIN
2016-04-11 0.595 0.628 0.665 0.691 0.7500 0.795 0.875 0.945 1.013 1.088 1.188 1.282 1.398 1.465 1.538 0.661011 0.700018 0.739041 0.769040
2016-04-12 0.598 0.633 0.667 0.705 0.7550 0.800 0.875 0.940 1.005 1.080 1.185 1.293 1.398 1.465 1.543 0.668012 0.701518 0.735034 0.819086
2016-04-13 0.608 0.628 0.660 0.703 0.7550 0.810 0.875 0.940 1.008 1.080 1.180 1.288 1.388 1.464 1.539 0.648004 0.686010 0.724031 0.832110
2016-04-14 0.625 0.640 0.668 0.702 0.7425 0.803 0.873 0.935 1.000 1.075 1.175 1.269 1.383 1.453 1.529 0.655002 0.689507 0.724023 0.804069
2016-04-15 0.613 0.628 0.663 0.702 0.7500 0.803 0.870 0.935 1.003 1.078 1.180 1.283 1.388 1.455 1.532 0.643002 0.688009 0.733037 0.819091

In [82]:
t1,t2=1,3

In [86]:
#df[str(t2-t1)+'yf'+str(t1)+'y'] = pd.Series('',index = df.index)
df.tail()


Out[86]:
1y 2y 3y 4y 5y 6y 7y 8y 9y 10y 12y 15y 20y 25y 30y 1yf1y 2yf1y
Ticker/ISIN
2016-04-11 0.595 0.628 0.665 0.691 0.7500 0.795 0.875 0.945 1.013 1.088 1.188 1.282 1.398 1.465 1.538 0.644504
2016-04-12 0.598 0.633 0.667 0.705 0.7550 0.8 0.875 0.94 1.005 1.080 1.185 1.293 1.398 1.465 1.543 0.650505
2016-04-13 0.608 0.628 0.66 0.703 0.7550 0.81 0.875 0.94 1.008 1.080 1.180 1.288 1.388 1.464 1.539 0.638001
2016-04-14 0.625 0.640 0.668 0.702 0.7425 0.803 0.873 0.935 1 1.075 1.175 1.269 1.383 1.453 1.529 0.647501
2016-04-15 0.613 0.628 0.663 0.702 0.7500 0.803 0.870 0.935 1.003 1.078 1.180 1.283 1.388 1.455 1.532 0.635501

df_raw['2yf1y'] = df_raw[['1y','3y']].apply(lambda x: getForward(x,t1=1,t2=3),axis=1)


In [88]:
df_raw[['1y','3y']]


Out[88]:
1y 3y
Ticker/ISIN
2004-06-21 2.1750 #N/A N.A.
2004-06-22 2.1650 2.315
2004-06-23 2.1850 2.27
2004-06-24 2.1550 2.245
2004-06-25 2.1550 2.245
2004-06-28 2.0950 2.26
2004-06-29 2.0950 2.1575
2004-06-30 2.0350 2.26
2004-07-01 2.0550 2.15
2004-07-02 2.0450 2.165
2004-07-05 2.0550 2.195
2004-07-06 2.0850 2.1925
2004-07-07 2.0750 2.165
2004-07-08 2.0750 2.1675
2004-07-09 2.0750 2.18
2004-07-12 2.0650 2.2
2004-07-13 2.0750 2.19
2004-07-14 2.0950 2.19
2004-07-15 2.0950 2.215
2004-07-16 2.0950 2.1975
2004-07-19 2.0950 2.1975
2004-07-20 2.0650 2.185
2004-07-21 2.0850 2.185
2004-07-22 2.0950 2.2
2004-07-23 2.0450 2.18
2004-07-26 2.0850 2.155
2004-07-27 2.0950 2.23
2004-07-28 2.1450 2.21
2004-07-29 2.0150 2.135
2004-07-30 2.0150 2.1175
... ... ...
2016-03-07 0.3300 0.598
2016-03-08 0.3480 0.59
2016-03-09 0.3500 0.593
2016-03-10 0.3250 0.583
2016-03-11 0.3130 0.575
2016-03-14 0.2800 0.555
2016-03-15 0.2400 0.528
2016-03-16 0.2580 0.528
2016-03-17 0.2950 0.545
2016-03-18 0.3105 0.5575
2016-03-21 0.3050 0.55
2016-03-22 0.2880 0.53
2016-03-23 0.2837 0.5255
2016-03-24 0.2812 0.5125
2016-03-25 0.2860 0.517
2016-03-28 0.2812 0.507
2016-03-29 0.2925 0.4975
2016-03-30 0.3355 0.5175
2016-03-31 0.2480 0.503
2016-04-01 0.5550 0.633
2016-04-04 0.5730 0.645
2016-04-05 0.5580 0.635
2016-04-06 0.5905 0.658
2016-04-07 0.5700 0.64
2016-04-08 0.6037 0.668
2016-04-11 0.5950 0.665
2016-04-12 0.5980 0.667
2016-04-13 0.6080 0.66
2016-04-14 0.6250 0.668
2016-04-15 0.6130 0.663

3085 rows × 2 columns


In [85]:
df[str(t2-t1)+'yf'+str(t1)+'y'] = df[[str(t1)+'y',str(t2)+'y']].apply(lambda x: getForward(x,t1,t2),axis=1)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-85-d9e28648b267> in <module>()
----> 1 df[str(t2-t1)+'yf'+str(t1)+'y'] = df[[str(t1)+'y',str(t2)+'y']].apply(lambda x: getForward(x,t1,t2),axis=1)

C:\Users\EEU227\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, args, **kwds)
   4040                     if reduce is None:
   4041                         reduce = True
-> 4042                     return self._apply_standard(f, axis, reduce=reduce)
   4043             else:
   4044                 return self._apply_broadcast(f, axis)

C:\Users\EEU227\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in _apply_standard(self, func, axis, ignore_failures, reduce)
   4136             try:
   4137                 for i, v in enumerate(series_gen):
-> 4138                     results[i] = func(v)
   4139                     keys.append(v.name)
   4140             except Exception as e:

<ipython-input-85-d9e28648b267> in <lambda>(x)
----> 1 df[str(t2-t1)+'yf'+str(t1)+'y'] = df[[str(t1)+'y',str(t2)+'y']].apply(lambda x: getForward(x,t1,t2),axis=1)

<ipython-input-52-bba463230456> in getForward(v, t1, t2)
      1 def getForward(v,t1=1,t2=2):
----> 2     return (np.power(np.power(1+v[1]/100,t2)/np.power(1+v[0]/100,t1),1/(t2-t1))-1)*100

TypeError: ("unsupported operand type(s) for /: 'str' and 'int'", 'occurred at index 2004-06-21 00:00:00')

In [ ]: