In [ ]:
# what is special about timeseries?? it is everywhere
In [ ]:
## Pandas basic
# uses dataframe to put data in easy to use format
# SQL like - relational
# fast read right from storage
In [5]:
import pandas as pd
import numpy as np
In [17]:
# DataFrame from dict
students = pd.DataFrame( {
'phone': ['123-1234', '321-4321', '321-4321'],
'age': [13, 12, 13] },
index = ['alice', 'bob', 'eve'] )
students
Out[17]:
In [19]:
students.index
Out[19]:
In [22]:
# Dataframe from numpy array
df = pd.DataFrame(np.random.randn(6,4),
index=['index1', 'index2', 'index3', 'index4','index5','index6'],
columns = list('ABCD'))
df
Out[22]:
In [24]:
# series --> 1D set of data with an index
s = pd.Series([1, 3, 4, np.nan, 4, 2])
s
Out[24]:
In [26]:
# adding index later
s.index = ['a','b','c','d','e', 'f']
s
Out[26]:
In [30]:
# Missing data
s.isnull()
Out[30]:
In [29]:
# plotting
import matplotlib.pyplot as plt
%matplotlib inline
In [32]:
s.plot()
Out[32]:
In [33]:
df.plot()
Out[33]:
In [34]:
# see portion of data in dataframe or series
s[s.index > 'c']
Out[34]:
In [36]:
s[s.isnull() == False]
Out[36]:
In [37]:
df.head()
Out[37]:
In [41]:
# select column
df.A
Out[41]:
In [40]:
df['B']
Out[40]:
In [42]:
# select label
df.loc['index2']
Out[42]:
In [43]:
# label and column intersection
df.loc['index3', ['A', 'C']]
Out[43]:
In [ ]:
#