In [1]:
import pandas as pd
import sys
%matplotlib inline

In [2]:
print 'Python version ' + sys.version
print 'Pandas version ' + pd.__version__


Python version 2.7.5 |Anaconda 2.1.0 (64-bit)| (default, Jul  1 2013, 12:37:52) [MSC v.1500 64 bit (AMD64)]
Pandas version 0.15.2

Merge

I have two dataframes that have dates as their index. The problem is that one of the dataframes has a timestamp and this is preventing me from adding the dataframes together. How can I match up the time stamps?


In [3]:
df1 = pd.DataFrame({'col1':[pd.Timestamp('20130102000030'),
                         pd.Timestamp('2013-01-03 00:00:30'),
                         pd.Timestamp('1/4/2013 000030')],
                 'col2':[1,10,18]
                 })
df1


Out[3]:
col1 col2
0 2013-01-02 00:00:30 1
1 2013-01-03 00:00:30 10
2 2013-01-04 00:00:30 18

In [4]:
df1 = df1.set_index('col1')
df1


Out[4]:
col2
col1
2013-01-02 00:00:30 1
2013-01-03 00:00:30 10
2013-01-04 00:00:30 18

In [5]:
d = {'col2':[22,10,113]}

i = [pd.Timestamp('20130102'),
     pd.Timestamp('2013-01-03'),
     pd.Timestamp('1/4/2013')]
                 

df2 = pd.DataFrame(data=d, index = i)
df2.index.name = 'col1'
df2


Out[5]:
col2
col1
2013-01-02 22
2013-01-03 10
2013-01-04 113

In [6]:
# If we try to add the data frames together, we do not get the results we want.
df2+df1


Out[6]:
col2
col1
2013-01-02 00:00:00 NaN
2013-01-02 00:00:30 NaN
2013-01-03 00:00:00 NaN
2013-01-03 00:00:30 NaN
2013-01-04 00:00:00 NaN
2013-01-04 00:00:30 NaN

In [7]:
# Make the index of df2 the same as the index of df1
# Fill the missing values with previous known value
#
#2013-01-02 00:00:00 => 22
#2013-01-02 00:00:30 => 22 
#2013-01-03 00:00:00 => 10
#2013-01-03 00:00:00 => 10
#2013-01-04 00:00:00 => 113
#2013-01-04 00:00:00 => 113
df2.reindex(df1.index, method='pad')


Out[7]:
col2
col1
2013-01-02 00:00:30 22
2013-01-03 00:00:30 10
2013-01-04 00:00:30 113

In [8]:
# Now we can add them
df2 = df2.reindex(df1.index, method='pad')
df1+df2


Out[8]:
col2
col1
2013-01-02 00:00:30 23
2013-01-03 00:00:30 20
2013-01-04 00:00:30 131

How do I add two dataframes together by row?


In [9]:
df1 = pd.DataFrame([1,2,3])
df1


Out[9]:
0
0 1
1 2
2 3

In [10]:
df2 = pd.DataFrame([4,5,6])
df2


Out[10]:
0
0 4
1 5
2 6

In [11]:
pd.concat([df1,df2])


Out[11]:
0
0 1
1 2
2 3
0 4
1 5
2 6

How do I join two data frames by index?


In [12]:
d = {'col1':[22,10,113]}

i = [pd.Timestamp('1/1/2013'),
     pd.Timestamp('1/2/2013'),
     pd.Timestamp('1/3/2013')]
                 

df1 = pd.DataFrame(data=d, index = i)
df1


Out[12]:
col1
2013-01-01 22
2013-01-02 10
2013-01-03 113

In [13]:
d = {'col2':[5,5]}

i = [pd.Timestamp('1/1/2013'),
     pd.Timestamp('1/3/2013')]
                 

df2 = pd.DataFrame(data=d, index = i)
df2


Out[13]:
col2
2013-01-01 5
2013-01-03 5

In [14]:
df1.merge(df2, left_index=True, right_index=True, how='left')


Out[14]:
col1 col2
2013-01-01 22 5
2013-01-02 10 NaN
2013-01-03 113 5

Author: David Rojas