In [1]:
import pandas as pd
import sys
%matplotlib inline
In [2]:
print 'Python version ' + sys.version
print 'Pandas version ' + pd.__version__
In [3]:
df1 = pd.DataFrame({'col1':[pd.Timestamp('20130102000030'),
pd.Timestamp('2013-01-03 00:00:30'),
pd.Timestamp('1/4/2013 000030')],
'col2':[1,10,18]
})
df1
Out[3]:
In [4]:
df1 = df1.set_index('col1')
df1
Out[4]:
In [5]:
d = {'col2':[22,10,113]}
i = [pd.Timestamp('20130102'),
pd.Timestamp('2013-01-03'),
pd.Timestamp('1/4/2013')]
df2 = pd.DataFrame(data=d, index = i)
df2.index.name = 'col1'
df2
Out[5]:
In [6]:
# If we try to add the data frames together, we do not get the results we want.
df2+df1
Out[6]:
In [7]:
# Make the index of df2 the same as the index of df1
# Fill the missing values with previous known value
#
#2013-01-02 00:00:00 => 22
#2013-01-02 00:00:30 => 22
#2013-01-03 00:00:00 => 10
#2013-01-03 00:00:00 => 10
#2013-01-04 00:00:00 => 113
#2013-01-04 00:00:00 => 113
df2.reindex(df1.index, method='pad')
Out[7]:
In [8]:
# Now we can add them
df2 = df2.reindex(df1.index, method='pad')
df1+df2
Out[8]:
In [9]:
df1 = pd.DataFrame([1,2,3])
df1
Out[9]:
In [10]:
df2 = pd.DataFrame([4,5,6])
df2
Out[10]:
In [11]:
pd.concat([df1,df2])
Out[11]:
In [12]:
d = {'col1':[22,10,113]}
i = [pd.Timestamp('1/1/2013'),
pd.Timestamp('1/2/2013'),
pd.Timestamp('1/3/2013')]
df1 = pd.DataFrame(data=d, index = i)
df1
Out[12]:
In [13]:
d = {'col2':[5,5]}
i = [pd.Timestamp('1/1/2013'),
pd.Timestamp('1/3/2013')]
df2 = pd.DataFrame(data=d, index = i)
df2
Out[13]:
In [14]:
df1.merge(df2, left_index=True, right_index=True, how='left')
Out[14]:
Author: David Rojas