In [1]:
    
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
    
In [2]:
    
# Series
# s = pd.Series(data, index = index)
s = pd.Series(np.random.randn(5), index = ['a','b','c','d','e'])
s
    
    Out[2]:
In [3]:
    
# To get the index
s.index
    
    Out[3]:
In [4]:
    
duplicate_index = pd.Series(np.random.randn(4), index = ['a', 'a', 'c', 'd'])
duplicate_index
    
    Out[4]:
In [5]:
    
duplicate_index['a']
    
    Out[5]:
In [6]:
    
# can pass dict to a series where keys of a dict become index of the series
# Series same as ndarray so it can be passed as argument to most Numpy functions
# Slicing also works for the index of Series
# Index can also be represented as standard numbers
s[0]
    
    Out[6]:
In [7]:
    
s[0:3]
    
    Out[7]:
In [8]:
    
s
    
    Out[8]:
In [9]:
    
# Series is also a fixed size dict
# So all operations of dict also work for Series
# You can also explicitly add new indexes
s['f'] = 10
    
In [10]:
    
s
    
    Out[10]:
In [12]:
    
# Name
s = pd.Series(np.random.randn(6), name='Something')
s
    
    Out[12]:
In [13]:
    
s.name
    
    Out[13]:
In [41]:
    
df = pd.DataFrame({'one':pd.Series([1.,2.,3.], index=['a', 'b', 'c']),
                  'two':pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])})
df
    
    Out[41]:
In [16]:
    
df.index
    
    Out[16]:
In [17]:
    
df.columns
    
    Out[17]:
In [21]:
    
# DataFrame does not function in the same way as numpy 2-D array
# Treat DataFrame as a dict of like-indexed Series objects
    
In [22]:
    
df['one']
    
    Out[22]:
In [24]:
    
# You first select the Series and then the row you want
df['one']['a']
    
    Out[24]:
In [26]:
    
df['three'] = df['one'] + df['two']
df
    
    Out[26]:
In [28]:
    
df['flag'] = df['one']  > 2
df
    
    Out[28]:
In [29]:
    
del df['flag']
df
    
    Out[29]:
In [30]:
    
three = df.pop('three')
three
    
    Out[30]:
In [31]:
    
# You can almost do all operations of a dictionary assuming that
# keys are columns names and the values are the complete column
# Remember df['name'] selects then 'name' Series in the df
# Now it can be treated as a simple ndarray.
# To access it's the element of df['name'] simply use df['name']['index']
    
In [32]:
    
df
    
    Out[32]:
In [33]:
    
df['one'][0]
    
    Out[33]:
In [35]:
    
# assign
# To create a new column from a combination of the existing columns
df.assign(ratio = df['one']/df['two'])
    
    Out[35]:
In [36]:
    
df.assign(check = df['one'] > 2)
    
    Out[36]:
In [45]:
    
# If you want to do condition of something while assigning
df.query('two > 2').assign(eg1 = lambda x: x.two)
# You can also use this syntax if you want some a colum filled with
# values meeting some required conditon
    
    Out[45]:
In [46]:
    
# To apply multiple conditions
df.query('two > 2 & one != 3').assign(eg2 = lambda x : x.two)
    
    Out[46]:
In [ ]:
    
# Indexing rules
# df['col_name'] = Return the complete Series i.e. column
# df.loc['label'] = Returns the entire row in the form of a Series
# df.iloc[label_in_integer_locations] = same as above
# df[5:10] = Slice the rows
# df(bool_vec) = Select rows by boolen vector
    
In [50]:
    
df
    
    Out[50]:
In [51]:
    
df.loc['b']
    
    Out[51]:
In [52]:
    
df.iloc[1]
    
    Out[52]:
In [56]:
    
# Alignment and Arithematic
df1 = pd.DataFrame(np.random.randn(10, 4), columns = ['A', 'B', 'C', 'D'])
df2 = pd.DataFrame(np.random.randn(7 , 3), columns = ['A', 'B', 'C'])
    
In [57]:
    
df1
    
    Out[57]:
In [58]:
    
df2
    
    Out[58]:
In [59]:
    
# To add the two dataframes
# The missing labels and columns would be treated as Nan
df1+df2
    
    Out[59]:
In [64]:
    
# When operating among Series and DatFrame align the index 
# of the Series with columns of the DatFrame
(df1+df2)*2
    
    Out[64]:
In [70]:
    
# You can explicitly change the type of all elements of a DatFrame
df = pd.DataFrame({'a':[1,0,1], 'b':[0,1,1]}, dtype=np.float32)
df
    
    Out[70]:
In [76]:
    
df.dtypes
    
    Out[76]:
In [77]:
    
df.T
    
    Out[77]:
In [81]:
    
# All arithmematic functions of numpy can be applied to DataFrames
# given that all elements are numeric
df = pd.DataFrame({'one':[np.NaN, 1, 2]})
print(df)
# Due to the latest release of v.22 operations on NaN simply ignore
# NaN and return the result of operations as NaN rather than giving 
# error as in earlier version
np.log(df)
    
    
    Out[81]:
In [87]:
    
# Use info() to get all information about a DatFrame
df = pd.DataFrame({'one':pd.Series([1.,2.,3.], index=['a', 'b', 'c']),
                  'two':pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])})
df
    
    Out[87]:
In [88]:
    
df.info()
    
    
In [ ]:
    
# How much to print on each line
pd.set_option('display.width', 40)
# Max-width of individual columns
pd.set_option('display.max_colwidth', 30)
    
In [89]:
    
# For making 3-D arrays 
# Axis names
# 1) items -> axis =0
# 2) major_axis -> axis = 1(index)
# 3) minor_axis -> axis = 2(columns)
    
In [90]:
    
wp = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], 
             major_axis = pd.date_range('9/1/2018', periods = 5),
             minor_axis = ['A', 'B', 'C', 'D'])
wp
    
    Out[90]:
In [92]:
    
# Creating from dict
data = {'Item1':pd.DataFrame(np.random.randn(4,3)),
              'Item2':pd.DataFrame(np.random.randn(4,2))}
wp = pd.Panel(data)
wp
    
    Out[92]: