notebook.community

Edit and run



In [ ]:

    
# what is special about timeseries?? it is everywhere



In [ ]:

    
## Pandas basic
  # uses dataframe to put data in easy to use format
  # SQL like - relational
  # fast read right from storage



In [5]:

    
import pandas as pd
import numpy as np



In [17]:

    
# DataFrame from dict
students = pd.DataFrame( {
    'phone': ['123-1234', '321-4321', '321-4321'],
    'age': [13, 12, 13] }, 
    index = ['alice', 'bob', 'eve'] )
students



In [19]:

    
students.index









    Out[19]:





Index(['alice', 'bob', 'eve'], dtype='object')



In [22]:

    
# Dataframe from numpy array
df = pd.DataFrame(np.random.randn(6,4), 
                  index=['index1', 'index2', 'index3', 'index4','index5','index6'], 
                  columns = list('ABCD'))
df



In [24]:

    
# series --> 1D set of data with an index
s = pd.Series([1, 3, 4, np.nan, 4, 2])
s









    Out[24]:





0    1.0
1    3.0
2    4.0
3    NaN
4    4.0
5    2.0
dtype: float64



In [26]:

    
# adding index later
s.index = ['a','b','c','d','e', 'f']
s









    Out[26]:





a    1.0
b    3.0
c    4.0
d    NaN
e    4.0
f    2.0
dtype: float64



In [30]:

    
# Missing data
s.isnull()









    Out[30]:





a    False
b    False
c    False
d     True
e    False
f    False
dtype: bool



In [29]:

    
# plotting 
import matplotlib.pyplot as plt
%matplotlib inline



In [32]:

    
s.plot()









    Out[32]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fa8d44c4a90>



In [33]:

    
df.plot()









    Out[33]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fa8d447f400>



In [34]:

    
# see portion of data in dataframe or series
s[s.index > 'c']









    Out[34]:





d    NaN
e    4.0
f    2.0
dtype: float64



In [36]:

    
s[s.isnull() == False]









    Out[36]:





a    1.0
b    3.0
c    4.0
e    4.0
f    2.0
dtype: float64



In [37]:

    
df.head()



In [41]:

    
# select column
df.A









    Out[41]:





index1    1.041647
index2   -0.613589
index3   -0.523167
index4    0.711498
index5    2.005816
index6    1.127781
Name: A, dtype: float64



In [40]:

    
df['B']









    Out[40]:





index1   -0.281104
index2    0.232681
index3    0.249878
index4   -0.111896
index5   -0.111702
index6    0.606418
Name: B, dtype: float64



In [42]:

    
# select label
df.loc['index2']









    Out[42]:





A   -0.613589
B    0.232681
C   -0.130158
D   -1.382819
Name: index2, dtype: float64



In [43]:

    
# label and column intersection
df.loc['index3', ['A', 'C']]









    Out[43]:





A   -0.523167
C    0.422815
Name: index3, dtype: float64



In [ ]:

    
#

	A	B	C	D
index1	1.041647	-0.281104	-2.357189	-0.497115
index2	-0.613589	0.232681	-0.130158	-1.382819
index3	-0.523167	0.249878	0.422815	0.628948
index4	0.711498	-0.111896	-0.359935	0.050962
index5	2.005816	-0.111702	0.495628	-0.243297
index6	1.127781	0.606418	-0.658909	-0.280735