In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
from datetime import datetime
In [3]:
my_year = 2017
my_month = 10
my_day = 14
my_hour = 15
my_minute = 30
my_second = 15
In [4]:
my_date = datetime(my_year, my_month, my_day)
In [5]:
my_date
Out[5]:
In [6]:
my_date_time = datetime(my_year, my_month, my_day, my_hour, my_minute, my_second)
In [7]:
my_date_time
Out[7]:
In [8]:
type(my_date_time)
Out[8]:
In [9]:
my_date_time.day
Out[9]:
In [10]:
my_date_time.month
Out[10]:
In [11]:
first_two = [datetime(2016, 1, 1), datetime(2016, 1, 2)]
In [12]:
first_two
Out[12]:
In [13]:
type(first_two)
Out[13]:
In [14]:
type(first_two[0])
Out[14]:
In [15]:
dt_ind = pd.DatetimeIndex(first_two)
In [17]:
dt_ind
Out[17]:
In [18]:
data = np.random.randn(2, 2)
In [19]:
cols = ["a", "b"]
In [20]:
df = pd.DataFrame(data, dt_ind, cols)
In [21]:
df
Out[21]:
In [22]:
df.index.argmax()
Out[22]:
In [23]:
df.index.argmin()
Out[23]:
In [24]:
df.index.min()
Out[24]:
In [25]:
type(df.index.min())
Out[25]:
In [27]:
df = pd.read_csv("time_data/walmart_stock.csv")
In [28]:
df.head()
Out[28]:
In [30]:
df.info()
In [33]:
df["Date"] = pd.to_datetime(df["Date"]) # Be aware of formatting!!
In [34]:
df.info()
In [35]:
df.set_index("Date", inplace=True)
In [37]:
df.head()
Out[37]:
In [41]:
df2 = pd.read_csv("time_data/walmart_stock.csv", index_col="Date", parse_dates=True)
In [42]:
df2.head()
Out[42]:
In [47]:
df2.index
Out[47]:
In [46]:
type(df2.index[0])
Out[46]:
In [49]:
df.resample(rule="A")
Out[49]:
Alias | Description |
---|---|
B | business day frequency |
C | custom business day frequency (experimental) |
D | calendar day frequency |
W | weekly frequency |
M | month end frequency |
SM | semi-month end frequency (15th and end of month) |
BM | business month end frequency |
CBM | custom business month end frequency |
MS | month start frequency |
SMS | semi-month start frequency (1st and 15th) |
BMS | business month start frequency |
CBMS | custom business month start frequency |
Q | quarter end frequency |
BQ | business quarter endfrequency |
QS | quarter start frequency |
BQS | business quarter start frequency |
A | year end frequency |
BA | business year end frequency |
AS | year start frequency |
BAS | business year start frequency |
BH | business hour frequency |
H | hourly frequency |
T, min | minutely frequency |
S | secondly frequency |
L, ms | milliseconds |
U, us | microseconds |
N | nanoseconds |
In [51]:
df.resample(rule="A").mean()
Out[51]:
In [52]:
df.resample(rule="BQ").mean()
Out[52]:
In [53]:
df.resample(rule="A").max()
Out[53]:
In [54]:
def first_day(entry):
return entry[0]
In [55]:
df.resample("A").apply(first_day)
Out[55]:
In [56]:
df["Close"].resample("A").mean().plot(kind="bar")
Out[56]:
In [59]:
df["Close"].resample("M").mean().plot(kind="bar", figsize=(16, 6))
Out[59]:
In [1]:
import pandas as pd
In [2]:
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
df = pd.read_csv("time_data/walmart_stock.csv", index_col="Date", parse_dates=True)
In [4]:
df.head()
Out[4]:
In [5]:
df.tail()
Out[5]:
In [15]:
df.shift(periods=1).head() # first gets filled in by NaN
Out[15]:
In [14]:
df.shift(periods=-1).tail() # last gets filled by NaN
Out[14]:
In [13]:
df.head()
Out[13]:
In [16]:
df.tshift(freq="M").head() # shifts to end of month
Out[16]:
In [17]:
df.shift(freq="A").head() # shifts to end of year
Out[17]:
In [18]:
df.head()
Out[18]:
In [19]:
df["Open"].plot(figsize=(16, 6))
Out[19]:
In [21]:
df.rolling(window=7).mean().head(20) # not inplace!
Out[21]:
In [25]:
df["Open"].plot()
df.rolling(window=7).mean()["Close"].plot(figsize=(16, 6))
df.rolling(window=14).mean()["Close"].plot(figsize=(16, 6))
df.rolling(window=28).mean()["Close"].plot(figsize=(16, 6))
Out[25]:
In [26]:
df["Close 30 Day MA"] = df["Close"].rolling(window=30).mean()
df[["Close 30 Day MA", "Close"]].plot(figsize=(16, 6))
Out[26]:
In [29]:
df["Close"].expanding().mean().plot(figsize=(16, 6))
Out[29]:
determining if the price is high or not
In [30]:
# Close 20 MA
df["Close: 20 Day Mean"] = df["Close"].rolling(20).mean()
# Upper = 20MA + 2 * std(20)
df["Upper"] = df["Close: 20 Day Mean"] + 2 * (df["Close"].rolling(20).std())
# Lower = 20MA - 2 * std(20)
df["Lower"] = df["Close: 20 Day Mean"] - 2 * (df["Close"].rolling(20).std())
# Close
df[["Close", "Close: 20 Day Mean", "Upper", "Lower"]].plot(figsize=(16, 6))
Out[30]: