In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pandas import DataFrame, Series

In [7]:
from datetime import datetime
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7),
        datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = Series(np.random.randn(6), index=dates)
ts


Out[7]:
2011-01-02   -0.217490
2011-01-05    0.232629
2011-01-07   -1.306862
2011-01-08   -0.037602
2011-01-10    1.035283
2011-01-12   -1.695776
dtype: float64

In [8]:
type(ts)


Out[8]:
pandas.core.series.Series

In [9]:
ts.index


Out[9]:
DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [10]:
ts + ts[::2]


Out[10]:
2011-01-02   -0.434980
2011-01-05         NaN
2011-01-07   -2.613724
2011-01-08         NaN
2011-01-10    2.070567
2011-01-12         NaN
dtype: float64

In [11]:
ts.index.dtype


Out[11]:
dtype('<M8[ns]')

In [13]:
stamp = ts.index[0]
stamp


Out[13]:
Timestamp('2011-01-02 00:00:00')

In [14]:
stamp = ts.index[2]
ts[stamp]


Out[14]:
-1.3068620596944851

In [15]:
ts['1/10/2011']


Out[15]:
1.0352833533993351

In [16]:
ts['20110110']


Out[16]:
1.0352833533993351

In [17]:
longer_ts = Series(np.random.randn(1000),
                  index=pd.date_range('1/1/2000', periods=1000))
longer_ts


Out[17]:
2000-01-01   -0.038460
2000-01-02    0.453099
2000-01-03   -0.651186
2000-01-04   -0.663280
2000-01-05   -0.603742
2000-01-06    0.353221
2000-01-07   -2.176615
2000-01-08    0.214963
2000-01-09   -0.523673
2000-01-10   -1.041635
2000-01-11    0.682076
2000-01-12   -0.482131
2000-01-13   -0.329607
2000-01-14   -0.176688
2000-01-15    1.263878
2000-01-16    0.136266
2000-01-17    1.130064
2000-01-18    0.782839
2000-01-19    0.209327
2000-01-20    1.373854
2000-01-21    1.244269
2000-01-22   -0.695844
2000-01-23    0.159428
2000-01-24    1.404400
2000-01-25   -1.209737
2000-01-26    0.668682
2000-01-27   -0.131594
2000-01-28   -2.835395
2000-01-29    0.004286
2000-01-30    0.509177
                ...   
2002-08-28   -0.374837
2002-08-29   -0.333647
2002-08-30   -0.633700
2002-08-31    0.609845
2002-09-01    2.124618
2002-09-02    1.139003
2002-09-03    0.140639
2002-09-04    1.229015
2002-09-05    1.840096
2002-09-06    0.774543
2002-09-07    0.672464
2002-09-08   -1.208706
2002-09-09   -1.159701
2002-09-10    0.268345
2002-09-11    0.199017
2002-09-12   -0.133274
2002-09-13    0.688592
2002-09-14    1.874525
2002-09-15    2.030613
2002-09-16    0.173638
2002-09-17    0.101632
2002-09-18    0.462317
2002-09-19   -0.007706
2002-09-20   -0.695445
2002-09-21   -0.598202
2002-09-22    0.550767
2002-09-23    1.442328
2002-09-24   -0.151823
2002-09-25    0.194411
2002-09-26   -0.210302
Freq: D, dtype: float64

In [18]:
longer_ts['2001']


Out[18]:
2001-01-01   -0.826275
2001-01-02    1.387808
2001-01-03   -0.911606
2001-01-04   -0.546555
2001-01-05   -1.667812
2001-01-06   -1.131112
2001-01-07    1.252056
2001-01-08    0.319335
2001-01-09    0.740146
2001-01-10   -0.384294
2001-01-11    0.202212
2001-01-12    1.259484
2001-01-13   -2.233536
2001-01-14    0.474881
2001-01-15   -1.498646
2001-01-16   -1.507541
2001-01-17   -1.395348
2001-01-18   -0.696344
2001-01-19   -0.125430
2001-01-20   -1.064943
2001-01-21   -0.620401
2001-01-22   -0.139936
2001-01-23   -1.097298
2001-01-24    0.097586
2001-01-25    0.158815
2001-01-26    0.059007
2001-01-27    0.329880
2001-01-28    0.071079
2001-01-29   -0.186354
2001-01-30    0.052783
                ...   
2001-12-02   -3.265659
2001-12-03    0.986492
2001-12-04   -2.898704
2001-12-05   -1.609303
2001-12-06   -0.348018
2001-12-07   -0.860276
2001-12-08    0.145911
2001-12-09    0.311387
2001-12-10    0.426004
2001-12-11   -1.211686
2001-12-12    0.957913
2001-12-13    0.332860
2001-12-14   -1.446436
2001-12-15   -0.594030
2001-12-16   -0.192876
2001-12-17    0.268391
2001-12-18   -0.518900
2001-12-19   -0.804856
2001-12-20   -0.140815
2001-12-21    0.133363
2001-12-22   -0.598520
2001-12-23    0.540674
2001-12-24   -0.676043
2001-12-25    0.253910
2001-12-26   -0.437802
2001-12-27   -0.150000
2001-12-28   -0.039667
2001-12-29    0.738345
2001-12-30   -0.905963
2001-12-31    0.052548
Freq: D, dtype: float64

In [19]:
longer_ts['2001-05']


Out[19]:
2001-05-01   -0.943727
2001-05-02    0.051258
2001-05-03   -0.474104
2001-05-04    0.836441
2001-05-05    0.714126
2001-05-06   -0.966798
2001-05-07    1.573168
2001-05-08   -1.580141
2001-05-09    1.622680
2001-05-10   -0.315510
2001-05-11    0.330638
2001-05-12    1.210118
2001-05-13    0.015662
2001-05-14    0.483605
2001-05-15    2.558906
2001-05-16    2.043627
2001-05-17    1.232723
2001-05-18    0.123412
2001-05-19    0.519859
2001-05-20   -2.947284
2001-05-21    0.239201
2001-05-22   -2.051902
2001-05-23    0.092232
2001-05-24   -1.210047
2001-05-25   -0.597089
2001-05-26   -0.439398
2001-05-27   -2.276553
2001-05-28    1.270015
2001-05-29   -0.706633
2001-05-30    1.293651
2001-05-31    0.140200
Freq: D, dtype: float64

In [20]:
ts[datetime(2011, 1, 7):]


Out[20]:
2011-01-07   -1.306862
2011-01-08   -0.037602
2011-01-10    1.035283
2011-01-12   -1.695776
dtype: float64

In [21]:
ts


Out[21]:
2011-01-02   -0.217490
2011-01-05    0.232629
2011-01-07   -1.306862
2011-01-08   -0.037602
2011-01-10    1.035283
2011-01-12   -1.695776
dtype: float64

In [22]:
ts['1/6/2011':'1/11/2011']


Out[22]:
2011-01-07   -1.306862
2011-01-08   -0.037602
2011-01-10    1.035283
dtype: float64

In [23]:
ts.truncate(after='1/9/2011')


Out[23]:
2011-01-02   -0.217490
2011-01-05    0.232629
2011-01-07   -1.306862
2011-01-08   -0.037602
dtype: float64

In [24]:
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
long_df = DataFrame(np.random.randn(100, 4),
                   index=dates,
                   columns=['Colorado', 'Texas', 'New York', 'Ohio'])
long_df.ix['5-2001']


Out[24]:
Colorado Texas New York Ohio
2001-05-02 -0.756285 -0.002420 1.179144 -0.871021
2001-05-09 -0.120137 -0.893153 -0.001954 1.634898
2001-05-16 -1.046715 0.790516 1.469831 -0.178039
2001-05-23 0.071643 -1.757984 -1.191287 -0.735796
2001-05-30 -0.595489 0.955372 0.673426 1.737943

In [25]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/2/2000',
                         '1/3/2000'])
dup_ts = Series(np.arange(5), index=dates)
dup_ts


Out[25]:
2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [26]:
dup_ts.index.is_unique


Out[26]:
False

In [27]:
dup_ts['1/3/2000'] # 不重复


Out[27]:
4

In [28]:
dup_ts['1/2/2000'] # 重复


Out[28]:
2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

In [29]:
grouped = dup_ts.groupby(level=0)
grouped.mean()


Out[29]:
2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int32

In [30]:
grouped.count()


Out[30]:
2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

In [31]:
ts


Out[31]:
2011-01-02   -0.217490
2011-01-05    0.232629
2011-01-07   -1.306862
2011-01-08   -0.037602
2011-01-10    1.035283
2011-01-12   -1.695776
dtype: float64

In [33]:
ts.resample('D').mean()


Out[33]:
2011-01-02   -0.217490
2011-01-03         NaN
2011-01-04         NaN
2011-01-05    0.232629
2011-01-06         NaN
2011-01-07   -1.306862
2011-01-08   -0.037602
2011-01-09         NaN
2011-01-10    1.035283
2011-01-11         NaN
2011-01-12   -1.695776
Freq: D, dtype: float64

In [34]:
index = pd.date_range('4/1/2012', '6/1/2012')
index


Out[34]:
DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
               '2012-05-27', '2012-05-28', '2012-05-29', '2012-05-30',
               '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')

In [35]:
pd.date_range(start='4/1/2012', periods=20)


Out[35]:
DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],
              dtype='datetime64[ns]', freq='D')

In [36]:
pd.date_range(end='6/1/2012', periods=20)


Out[36]:
DatetimeIndex(['2012-05-13', '2012-05-14', '2012-05-15', '2012-05-16',
               '2012-05-17', '2012-05-18', '2012-05-19', '2012-05-20',
               '2012-05-21', '2012-05-22', '2012-05-23', '2012-05-24',
               '2012-05-25', '2012-05-26', '2012-05-27', '2012-05-28',
               '2012-05-29', '2012-05-30', '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')

In [37]:
pd.date_range('1/1/2000', '12/1/2000', freq='BM')


Out[37]:
DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-29', '2000-10-31', '2000-11-30'],
              dtype='datetime64[ns]', freq='BM')

In [38]:
pd.date_range('5/2/2012 12:56:31', periods=5)


Out[38]:
DatetimeIndex(['2012-05-02 12:56:31', '2012-05-03 12:56:31',
               '2012-05-04 12:56:31', '2012-05-05 12:56:31',
               '2012-05-06 12:56:31'],
              dtype='datetime64[ns]', freq='D')

In [44]:
pd.date_range('7/21/2016 10:11:12.345678', periods=10, freq='W-MON')


Out[44]:
DatetimeIndex(['2016-07-25 10:11:12.345678', '2016-08-01 10:11:12.345678',
               '2016-08-08 10:11:12.345678', '2016-08-15 10:11:12.345678',
               '2016-08-22 10:11:12.345678', '2016-08-29 10:11:12.345678',
               '2016-09-05 10:11:12.345678', '2016-09-12 10:11:12.345678',
               '2016-09-19 10:11:12.345678', '2016-09-26 10:11:12.345678'],
              dtype='datetime64[ns]', freq='W-MON')

In [45]:
pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True)


Out[45]:
DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',
               '2012-05-06'],
              dtype='datetime64[ns]', freq='D')

In [ ]: