notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pandas import DataFrame, Series
from datetime import datetime



In [2]:

    
ts1 = Series(np.random.randn(3),
            index=pd.date_range('2012-6-13', periods=3, freq='W-WED'))
ts1









    Out[2]:





2012-06-13    0.061793
2012-06-20    0.895383
2012-06-27    0.846934
Freq: W-WED, dtype: float64



In [3]:

    
ts1.resample('B').mean()









    Out[3]:





2012-06-13    0.061793
2012-06-14         NaN
2012-06-15         NaN
2012-06-18         NaN
2012-06-19         NaN
2012-06-20    0.895383
2012-06-21         NaN
2012-06-22         NaN
2012-06-25         NaN
2012-06-26         NaN
2012-06-27    0.846934
Freq: B, dtype: float64



In [4]:

    
ts1.resample('D').ffill()









    Out[4]:





2012-06-13    0.061793
2012-06-14    0.061793
2012-06-15    0.061793
2012-06-16    0.061793
2012-06-17    0.061793
2012-06-18    0.061793
2012-06-19    0.061793
2012-06-20    0.895383
2012-06-21    0.895383
2012-06-22    0.895383
2012-06-23    0.895383
2012-06-24    0.895383
2012-06-25    0.895383
2012-06-26    0.895383
2012-06-27    0.846934
Freq: D, dtype: float64



In [5]:

    
dates = pd.DatetimeIndex(['2012-6-12', '2012-6-17', '2012-6-18',
                         '2012-6-21', '2012-6-22', '2012-6-29'])
ts2 = Series(np.random.randn(6), index=dates)
ts2









    Out[5]:





2012-06-12    1.341571
2012-06-17    0.416088
2012-06-18   -0.893586
2012-06-21    0.517002
2012-06-22   -0.306397
2012-06-29   -0.787403
dtype: float64



In [6]:

    
ts1 = ts1.resample('D').ffill()
ts1.reindex(ts2.index, method='ffill')









    Out[6]:





2012-06-12         NaN
2012-06-17    0.061793
2012-06-18    0.061793
2012-06-21    0.895383
2012-06-22    0.895383
2012-06-29    0.846934
dtype: float64



In [7]:

    
ts2 + ts1.reindex(ts2.index, method='ffill')









    Out[7]:





2012-06-12         NaN
2012-06-17    0.477881
2012-06-18   -0.831793
2012-06-21    1.412385
2012-06-22    0.588985
2012-06-29    0.059531
dtype: float64



In [8]:

    
gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46],
            index=pd.period_range('1984Q2', periods=7, freq='Q-SEP'))
infl = Series([0.025, 0.045, 0.037, 0.04],
             index=pd.period_range('1982', periods=4, freq='A-DEC'))
gdp









    Out[8]:





1984Q2    1.78
1984Q3    1.94
1984Q4    2.08
1985Q1    2.01
1985Q2    2.15
1985Q3    2.31
1985Q4    2.46
Freq: Q-SEP, dtype: float64



In [9]:

    
infl









    Out[9]:





1982    0.025
1983    0.045
1984    0.037
1985    0.040
Freq: A-DEC, dtype: float64



In [10]:

    
infl_q = infl.asfreq('Q-SEP', how='end')
infl_q









    Out[10]:





1983Q1    0.025
1984Q1    0.045
1985Q1    0.037
1986Q1    0.040
Freq: Q-SEP, dtype: float64



In [11]:

    
infl_q.reindex(gdp.index, method='ffill')









    Out[11]:





1984Q2    0.045
1984Q3    0.045
1984Q4    0.045
1985Q1    0.037
1985Q2    0.037
1985Q3    0.037
1985Q4    0.037
Freq: Q-SEP, dtype: float64



In [12]:

    
# 生成一个交易日内的日期范围和时间序列
rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T')

# 生成5天的时间点（9:30~15:59之间的值）
rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)])

ts = Series(np.arange(len(rng), dtype=float), index=rng)
ts









    Out[12]:





2012-06-01 09:30:00       0.0
2012-06-01 09:31:00       1.0
2012-06-01 09:32:00       2.0
2012-06-01 09:33:00       3.0
2012-06-01 09:34:00       4.0
2012-06-01 09:35:00       5.0
2012-06-01 09:36:00       6.0
2012-06-01 09:37:00       7.0
2012-06-01 09:38:00       8.0
2012-06-01 09:39:00       9.0
2012-06-01 09:40:00      10.0
2012-06-01 09:41:00      11.0
2012-06-01 09:42:00      12.0
2012-06-01 09:43:00      13.0
2012-06-01 09:44:00      14.0
2012-06-01 09:45:00      15.0
2012-06-01 09:46:00      16.0
2012-06-01 09:47:00      17.0
2012-06-01 09:48:00      18.0
2012-06-01 09:49:00      19.0
2012-06-01 09:50:00      20.0
2012-06-01 09:51:00      21.0
2012-06-01 09:52:00      22.0
2012-06-01 09:53:00      23.0
2012-06-01 09:54:00      24.0
2012-06-01 09:55:00      25.0
2012-06-01 09:56:00      26.0
2012-06-01 09:57:00      27.0
2012-06-01 09:58:00      28.0
2012-06-01 09:59:00      29.0
                        ...  
2012-06-06 15:30:00    1530.0
2012-06-06 15:31:00    1531.0
2012-06-06 15:32:00    1532.0
2012-06-06 15:33:00    1533.0
2012-06-06 15:34:00    1534.0
2012-06-06 15:35:00    1535.0
2012-06-06 15:36:00    1536.0
2012-06-06 15:37:00    1537.0
2012-06-06 15:38:00    1538.0
2012-06-06 15:39:00    1539.0
2012-06-06 15:40:00    1540.0
2012-06-06 15:41:00    1541.0
2012-06-06 15:42:00    1542.0
2012-06-06 15:43:00    1543.0
2012-06-06 15:44:00    1544.0
2012-06-06 15:45:00    1545.0
2012-06-06 15:46:00    1546.0
2012-06-06 15:47:00    1547.0
2012-06-06 15:48:00    1548.0
2012-06-06 15:49:00    1549.0
2012-06-06 15:50:00    1550.0
2012-06-06 15:51:00    1551.0
2012-06-06 15:52:00    1552.0
2012-06-06 15:53:00    1553.0
2012-06-06 15:54:00    1554.0
2012-06-06 15:55:00    1555.0
2012-06-06 15:56:00    1556.0
2012-06-06 15:57:00    1557.0
2012-06-06 15:58:00    1558.0
2012-06-06 15:59:00    1559.0
dtype: float64



In [13]:

    
from datetime import time
ts[time(10, 0)]









    Out[13]:





2012-06-01 10:00:00      30.0
2012-06-04 10:00:00     420.0
2012-06-05 10:00:00     810.0
2012-06-06 10:00:00    1200.0
dtype: float64



In [14]:

    
ts.at_time(time(10, 0))









    Out[14]:





2012-06-01 10:00:00      30.0
2012-06-04 10:00:00     420.0
2012-06-05 10:00:00     810.0
2012-06-06 10:00:00    1200.0
dtype: float64



In [15]:

    
ts.between_time(time(10, 0), time(10, 1))









    Out[15]:





2012-06-01 10:00:00      30.0
2012-06-01 10:01:00      31.0
2012-06-04 10:00:00     420.0
2012-06-04 10:01:00     421.0
2012-06-05 10:00:00     810.0
2012-06-05 10:01:00     811.0
2012-06-06 10:00:00    1200.0
2012-06-06 10:01:00    1201.0
dtype: float64



In [16]:

    
# 将该时间序列的大部分内容随机设置为NA
indexer = np.sort(np.random.permutation(len(ts))[700:])
irr_ts = ts.copy()
irr_ts[indexer] = np.nan
irr_ts['2012-06-01 09:50':'2012-06-01 10:00']









    Out[16]:





2012-06-01 09:50:00    20.0
2012-06-01 09:51:00    21.0
2012-06-01 09:52:00    22.0
2012-06-01 09:53:00     NaN
2012-06-01 09:54:00     NaN
2012-06-01 09:55:00    25.0
2012-06-01 09:56:00     NaN
2012-06-01 09:57:00     NaN
2012-06-01 09:58:00     NaN
2012-06-01 09:59:00     NaN
2012-06-01 10:00:00     NaN
dtype: float64



In [17]:

    
selection = pd.date_range('2012-06-01 10:00', periods=4, freq='B')
irr_ts.asof(selection)









    Out[17]:





2012-06-01 10:00:00      25.0
2012-06-04 10:00:00     420.0
2012-06-05 10:00:00     809.0
2012-06-06 10:00:00    1198.0
Freq: B, dtype: float64



In [18]:

    
data1 = DataFrame(np.ones((6, 3), dtype=float),
                 columns=['a', 'b', 'c'],
                 index=pd.date_range('6/12/2012', periods=6))
data2 = DataFrame(np.ones((6, 3), dtype=float) * 2,
                  columns=['a', 'b', 'c'],
                  index=pd.date_range('6/13/2012', periods=6))
spliced = pd.concat([data1.ix[:'2012-06-14'], data2.ix['2012-06-15':]])
spliced



In [20]:

    
pd.concat([data1, data2])



In [22]:

    
data2 = DataFrame(np.ones((6, 4), dtype=float) * 2,
                 columns=['a', 'b', 'c', 'd'],
                 index=pd.date_range('6/13/2012', periods=6))
spliced = pd.concat([data1.ix[:'2012-06-14'], data2.ix['2012-06-15':]])
spliced



In [23]:

    
spliced_filled = spliced.combine_first(data2)
spliced_filled



In [24]:

    
spliced.update(data2, overwrite=False)
spliced



In [25]:

    
cp_spliced = spliced.copy()
cp_spliced[['a', 'c']] = data1[['a', 'c']]
cp_spliced



In [ ]:



In [ ]:

	a	b	c
2012-06-12	1.0	1.0	1.0
2012-06-13	1.0	1.0	1.0
2012-06-14	1.0	1.0	1.0
2012-06-15	2.0	2.0	2.0
2012-06-16	2.0	2.0	2.0
2012-06-17	2.0	2.0	2.0
2012-06-18	2.0	2.0	2.0