In [ ]:
# what is special about timeseries?? it is everywhere

In [ ]:
## Pandas basic
  # uses dataframe to put data in easy to use format
  # SQL like - relational
  # fast read right from storage

In [5]:
import pandas as pd
import numpy as np

In [17]:
# DataFrame from dict
students = pd.DataFrame( {
    'phone': ['123-1234', '321-4321', '321-4321'],
    'age': [13, 12, 13] }, 
    index = ['alice', 'bob', 'eve'] )
students


Out[17]:
age phone
alice 13 123-1234
bob 12 321-4321
eve 13 321-4321

In [19]:
students.index


Out[19]:
Index(['alice', 'bob', 'eve'], dtype='object')

In [22]:
# Dataframe from numpy array
df = pd.DataFrame(np.random.randn(6,4), 
                  index=['index1', 'index2', 'index3', 'index4','index5','index6'], 
                  columns = list('ABCD'))
df


Out[22]:
A B C D
index1 1.041647 -0.281104 -2.357189 -0.497115
index2 -0.613589 0.232681 -0.130158 -1.382819
index3 -0.523167 0.249878 0.422815 0.628948
index4 0.711498 -0.111896 -0.359935 0.050962
index5 2.005816 -0.111702 0.495628 -0.243297
index6 1.127781 0.606418 -0.658909 -0.280735

In [24]:
# series --> 1D set of data with an index
s = pd.Series([1, 3, 4, np.nan, 4, 2])
s


Out[24]:
0    1.0
1    3.0
2    4.0
3    NaN
4    4.0
5    2.0
dtype: float64

In [26]:
# adding index later
s.index = ['a','b','c','d','e', 'f']
s


Out[26]:
a    1.0
b    3.0
c    4.0
d    NaN
e    4.0
f    2.0
dtype: float64

In [30]:
# Missing data
s.isnull()


Out[30]:
a    False
b    False
c    False
d     True
e    False
f    False
dtype: bool

In [29]:
# plotting 
import matplotlib.pyplot as plt
%matplotlib inline

In [32]:
s.plot()


Out[32]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa8d44c4a90>

In [33]:
df.plot()


Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa8d447f400>

In [34]:
# see portion of data in dataframe or series
s[s.index > 'c']


Out[34]:
d    NaN
e    4.0
f    2.0
dtype: float64

In [36]:
s[s.isnull() == False]


Out[36]:
a    1.0
b    3.0
c    4.0
e    4.0
f    2.0
dtype: float64

In [37]:
df.head()


Out[37]:
A B C D
index1 1.041647 -0.281104 -2.357189 -0.497115
index2 -0.613589 0.232681 -0.130158 -1.382819
index3 -0.523167 0.249878 0.422815 0.628948
index4 0.711498 -0.111896 -0.359935 0.050962
index5 2.005816 -0.111702 0.495628 -0.243297

In [41]:
# select column
df.A


Out[41]:
index1    1.041647
index2   -0.613589
index3   -0.523167
index4    0.711498
index5    2.005816
index6    1.127781
Name: A, dtype: float64

In [40]:
df['B']


Out[40]:
index1   -0.281104
index2    0.232681
index3    0.249878
index4   -0.111896
index5   -0.111702
index6    0.606418
Name: B, dtype: float64

In [42]:
# select label
df.loc['index2']


Out[42]:
A   -0.613589
B    0.232681
C   -0.130158
D   -1.382819
Name: index2, dtype: float64

In [43]:
# label and column intersection
df.loc['index3', ['A', 'C']]


Out[43]:
A   -0.523167
C    0.422815
Name: index3, dtype: float64

In [ ]:
#