In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pandas import DataFrame, Series
from datetime import datetime

In [2]:
ts1 = Series(np.random.randn(3),
            index=pd.date_range('2012-6-13', periods=3, freq='W-WED'))
ts1


Out[2]:
2012-06-13    0.061793
2012-06-20    0.895383
2012-06-27    0.846934
Freq: W-WED, dtype: float64

In [3]:
ts1.resample('B').mean()


Out[3]:
2012-06-13    0.061793
2012-06-14         NaN
2012-06-15         NaN
2012-06-18         NaN
2012-06-19         NaN
2012-06-20    0.895383
2012-06-21         NaN
2012-06-22         NaN
2012-06-25         NaN
2012-06-26         NaN
2012-06-27    0.846934
Freq: B, dtype: float64

In [4]:
ts1.resample('D').ffill()


Out[4]:
2012-06-13    0.061793
2012-06-14    0.061793
2012-06-15    0.061793
2012-06-16    0.061793
2012-06-17    0.061793
2012-06-18    0.061793
2012-06-19    0.061793
2012-06-20    0.895383
2012-06-21    0.895383
2012-06-22    0.895383
2012-06-23    0.895383
2012-06-24    0.895383
2012-06-25    0.895383
2012-06-26    0.895383
2012-06-27    0.846934
Freq: D, dtype: float64

In [5]:
dates = pd.DatetimeIndex(['2012-6-12', '2012-6-17', '2012-6-18',
                         '2012-6-21', '2012-6-22', '2012-6-29'])
ts2 = Series(np.random.randn(6), index=dates)
ts2


Out[5]:
2012-06-12    1.341571
2012-06-17    0.416088
2012-06-18   -0.893586
2012-06-21    0.517002
2012-06-22   -0.306397
2012-06-29   -0.787403
dtype: float64

In [6]:
ts1 = ts1.resample('D').ffill()
ts1.reindex(ts2.index, method='ffill')


Out[6]:
2012-06-12         NaN
2012-06-17    0.061793
2012-06-18    0.061793
2012-06-21    0.895383
2012-06-22    0.895383
2012-06-29    0.846934
dtype: float64

In [7]:
ts2 + ts1.reindex(ts2.index, method='ffill')


Out[7]:
2012-06-12         NaN
2012-06-17    0.477881
2012-06-18   -0.831793
2012-06-21    1.412385
2012-06-22    0.588985
2012-06-29    0.059531
dtype: float64

In [8]:
gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46],
            index=pd.period_range('1984Q2', periods=7, freq='Q-SEP'))
infl = Series([0.025, 0.045, 0.037, 0.04],
             index=pd.period_range('1982', periods=4, freq='A-DEC'))
gdp


Out[8]:
1984Q2    1.78
1984Q3    1.94
1984Q4    2.08
1985Q1    2.01
1985Q2    2.15
1985Q3    2.31
1985Q4    2.46
Freq: Q-SEP, dtype: float64

In [9]:
infl


Out[9]:
1982    0.025
1983    0.045
1984    0.037
1985    0.040
Freq: A-DEC, dtype: float64

In [10]:
infl_q = infl.asfreq('Q-SEP', how='end')
infl_q


Out[10]:
1983Q1    0.025
1984Q1    0.045
1985Q1    0.037
1986Q1    0.040
Freq: Q-SEP, dtype: float64

In [11]:
infl_q.reindex(gdp.index, method='ffill')


Out[11]:
1984Q2    0.045
1984Q3    0.045
1984Q4    0.045
1985Q1    0.037
1985Q2    0.037
1985Q3    0.037
1985Q4    0.037
Freq: Q-SEP, dtype: float64

In [12]:
# 生成一个交易日内的日期范围和时间序列
rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T')

# 生成5天的时间点(9:30~15:59之间的值)
rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)])

ts = Series(np.arange(len(rng), dtype=float), index=rng)
ts


Out[12]:
2012-06-01 09:30:00       0.0
2012-06-01 09:31:00       1.0
2012-06-01 09:32:00       2.0
2012-06-01 09:33:00       3.0
2012-06-01 09:34:00       4.0
2012-06-01 09:35:00       5.0
2012-06-01 09:36:00       6.0
2012-06-01 09:37:00       7.0
2012-06-01 09:38:00       8.0
2012-06-01 09:39:00       9.0
2012-06-01 09:40:00      10.0
2012-06-01 09:41:00      11.0
2012-06-01 09:42:00      12.0
2012-06-01 09:43:00      13.0
2012-06-01 09:44:00      14.0
2012-06-01 09:45:00      15.0
2012-06-01 09:46:00      16.0
2012-06-01 09:47:00      17.0
2012-06-01 09:48:00      18.0
2012-06-01 09:49:00      19.0
2012-06-01 09:50:00      20.0
2012-06-01 09:51:00      21.0
2012-06-01 09:52:00      22.0
2012-06-01 09:53:00      23.0
2012-06-01 09:54:00      24.0
2012-06-01 09:55:00      25.0
2012-06-01 09:56:00      26.0
2012-06-01 09:57:00      27.0
2012-06-01 09:58:00      28.0
2012-06-01 09:59:00      29.0
                        ...  
2012-06-06 15:30:00    1530.0
2012-06-06 15:31:00    1531.0
2012-06-06 15:32:00    1532.0
2012-06-06 15:33:00    1533.0
2012-06-06 15:34:00    1534.0
2012-06-06 15:35:00    1535.0
2012-06-06 15:36:00    1536.0
2012-06-06 15:37:00    1537.0
2012-06-06 15:38:00    1538.0
2012-06-06 15:39:00    1539.0
2012-06-06 15:40:00    1540.0
2012-06-06 15:41:00    1541.0
2012-06-06 15:42:00    1542.0
2012-06-06 15:43:00    1543.0
2012-06-06 15:44:00    1544.0
2012-06-06 15:45:00    1545.0
2012-06-06 15:46:00    1546.0
2012-06-06 15:47:00    1547.0
2012-06-06 15:48:00    1548.0
2012-06-06 15:49:00    1549.0
2012-06-06 15:50:00    1550.0
2012-06-06 15:51:00    1551.0
2012-06-06 15:52:00    1552.0
2012-06-06 15:53:00    1553.0
2012-06-06 15:54:00    1554.0
2012-06-06 15:55:00    1555.0
2012-06-06 15:56:00    1556.0
2012-06-06 15:57:00    1557.0
2012-06-06 15:58:00    1558.0
2012-06-06 15:59:00    1559.0
dtype: float64

In [13]:
from datetime import time
ts[time(10, 0)]


Out[13]:
2012-06-01 10:00:00      30.0
2012-06-04 10:00:00     420.0
2012-06-05 10:00:00     810.0
2012-06-06 10:00:00    1200.0
dtype: float64

In [14]:
ts.at_time(time(10, 0))


Out[14]:
2012-06-01 10:00:00      30.0
2012-06-04 10:00:00     420.0
2012-06-05 10:00:00     810.0
2012-06-06 10:00:00    1200.0
dtype: float64

In [15]:
ts.between_time(time(10, 0), time(10, 1))


Out[15]:
2012-06-01 10:00:00      30.0
2012-06-01 10:01:00      31.0
2012-06-04 10:00:00     420.0
2012-06-04 10:01:00     421.0
2012-06-05 10:00:00     810.0
2012-06-05 10:01:00     811.0
2012-06-06 10:00:00    1200.0
2012-06-06 10:01:00    1201.0
dtype: float64

In [16]:
# 将该时间序列的大部分内容随机设置为NA
indexer = np.sort(np.random.permutation(len(ts))[700:])
irr_ts = ts.copy()
irr_ts[indexer] = np.nan
irr_ts['2012-06-01 09:50':'2012-06-01 10:00']


Out[16]:
2012-06-01 09:50:00    20.0
2012-06-01 09:51:00    21.0
2012-06-01 09:52:00    22.0
2012-06-01 09:53:00     NaN
2012-06-01 09:54:00     NaN
2012-06-01 09:55:00    25.0
2012-06-01 09:56:00     NaN
2012-06-01 09:57:00     NaN
2012-06-01 09:58:00     NaN
2012-06-01 09:59:00     NaN
2012-06-01 10:00:00     NaN
dtype: float64

In [17]:
selection = pd.date_range('2012-06-01 10:00', periods=4, freq='B')
irr_ts.asof(selection)


Out[17]:
2012-06-01 10:00:00      25.0
2012-06-04 10:00:00     420.0
2012-06-05 10:00:00     809.0
2012-06-06 10:00:00    1198.0
Freq: B, dtype: float64

In [18]:
data1 = DataFrame(np.ones((6, 3), dtype=float),
                 columns=['a', 'b', 'c'],
                 index=pd.date_range('6/12/2012', periods=6))
data2 = DataFrame(np.ones((6, 3), dtype=float) * 2,
                  columns=['a', 'b', 'c'],
                  index=pd.date_range('6/13/2012', periods=6))
spliced = pd.concat([data1.ix[:'2012-06-14'], data2.ix['2012-06-15':]])
spliced


Out[18]:
a b c
2012-06-12 1.0 1.0 1.0
2012-06-13 1.0 1.0 1.0
2012-06-14 1.0 1.0 1.0
2012-06-15 2.0 2.0 2.0
2012-06-16 2.0 2.0 2.0
2012-06-17 2.0 2.0 2.0
2012-06-18 2.0 2.0 2.0

In [20]:
pd.concat([data1, data2])


Out[20]:
a b c
2012-06-12 1.0 1.0 1.0
2012-06-13 1.0 1.0 1.0
2012-06-14 1.0 1.0 1.0
2012-06-15 1.0 1.0 1.0
2012-06-16 1.0 1.0 1.0
2012-06-17 1.0 1.0 1.0
2012-06-13 2.0 2.0 2.0
2012-06-14 2.0 2.0 2.0
2012-06-15 2.0 2.0 2.0
2012-06-16 2.0 2.0 2.0
2012-06-17 2.0 2.0 2.0
2012-06-18 2.0 2.0 2.0

In [22]:
data2 = DataFrame(np.ones((6, 4), dtype=float) * 2,
                 columns=['a', 'b', 'c', 'd'],
                 index=pd.date_range('6/13/2012', periods=6))
spliced = pd.concat([data1.ix[:'2012-06-14'], data2.ix['2012-06-15':]])
spliced


Out[22]:
a b c d
2012-06-12 1.0 1.0 1.0 NaN
2012-06-13 1.0 1.0 1.0 NaN
2012-06-14 1.0 1.0 1.0 NaN
2012-06-15 2.0 2.0 2.0 2.0
2012-06-16 2.0 2.0 2.0 2.0
2012-06-17 2.0 2.0 2.0 2.0
2012-06-18 2.0 2.0 2.0 2.0

In [23]:
spliced_filled = spliced.combine_first(data2)
spliced_filled


Out[23]:
a b c d
2012-06-12 1.0 1.0 1.0 NaN
2012-06-13 1.0 1.0 1.0 2.0
2012-06-14 1.0 1.0 1.0 2.0
2012-06-15 2.0 2.0 2.0 2.0
2012-06-16 2.0 2.0 2.0 2.0
2012-06-17 2.0 2.0 2.0 2.0
2012-06-18 2.0 2.0 2.0 2.0

In [24]:
spliced.update(data2, overwrite=False)
spliced


Out[24]:
a b c d
2012-06-12 1.0 1.0 1.0 NaN
2012-06-13 1.0 1.0 1.0 2.0
2012-06-14 1.0 1.0 1.0 2.0
2012-06-15 2.0 2.0 2.0 2.0
2012-06-16 2.0 2.0 2.0 2.0
2012-06-17 2.0 2.0 2.0 2.0
2012-06-18 2.0 2.0 2.0 2.0

In [25]:
cp_spliced = spliced.copy()
cp_spliced[['a', 'c']] = data1[['a', 'c']]
cp_spliced


Out[25]:
a b c d
2012-06-12 1.0 1.0 1.0 NaN
2012-06-13 1.0 1.0 1.0 2.0
2012-06-14 1.0 1.0 1.0 2.0
2012-06-15 1.0 2.0 1.0 2.0
2012-06-16 1.0 2.0 1.0 2.0
2012-06-17 1.0 2.0 1.0 2.0
2012-06-18 NaN 2.0 NaN 2.0

In [ ]:


In [ ]: