In [1]:
import pandas as pd
import numpy as np
In [2]:
# Just creating data structures
index = pd.date_range('1/1/2018', periods =8)
s =pd.Series(np.random.randn(5), index = ['a', 'b', 'c', 'd', 'e'])
df = pd.DataFrame(np.random.randn(8,3), index = index, columns=['A', 'B', 'C'])
In [3]:
df.shape
Out[3]:
In [4]:
s.shape
Out[4]:
In [5]:
s.values
Out[5]:
In [6]:
df.values
Out[6]:
In [7]:
# Now you can access it simply like a 2-D matrix
df.values[0,1]
Out[7]:
In [8]:
# Pandas uses numexpr and bottleneck libraries for doing certain
# types of operations quickly like df1 > df2, df1*df2
pd.set_option('compute.use_bottleneck', True)
pd.set_option('compute.use_numexpr', True)
# These options are set by default
In [9]:
# Use fill_value to replace to fill missing values
# Not referring to NaN here
df1=pd.DataFrame(np.random.randn(3,2))
df2=pd.DataFrame(np.random.randn(3,3))
print(df1)
print(df2)
In [10]:
# During addition the missing values to make the dataframes of
# same dimesnion would take value = 10
df1.add(df2, fill_value = 10)
Out[10]:
In [11]:
# It has standard function for comparison resulting in boolean answers
# gt, lt, le, ge, ne
# For greater than , less than , greater than equal to
In [12]:
df1.ge(df2)
Out[12]:
In [13]:
# Boolean reductions like empty, any, all, bool
(df1>0.5).all()
Out[13]:
In [14]:
(df1>0.5).any()
Out[14]:
In [15]:
(df1>0.5).empty
Out[15]:
In [16]:
# To test whether a value is present or not
(df1 > 0.5).any().any()
Out[16]:
In [17]:
# To compare two series or datframes use
# In this method NaN are also treated equal
(df1 + df1).equals(df1*2)
Out[17]:
In [ ]: