In [1]:
import pandas as pd
import numpy as np

In [2]:
# Just creating data structures
index = pd.date_range('1/1/2018', periods =8)
s =pd.Series(np.random.randn(5), index = ['a', 'b', 'c', 'd', 'e'])
df = pd.DataFrame(np.random.randn(8,3), index = index, columns=['A', 'B', 'C'])

In [3]:
df.shape


Out[3]:
(8, 3)

In [4]:
s.shape


Out[4]:
(5,)

In [5]:
s.values


Out[5]:
array([ 1.10119179, -1.72636575,  0.13548559, -0.4199017 ,  2.48382293])

In [6]:
df.values


Out[6]:
array([[ 1.16880771,  0.66447413, -1.10776953],
       [ 0.62798713, -1.09639665,  0.34518614],
       [-1.05039301, -1.16528949, -0.66624711],
       [-0.2156926 ,  1.29324589, -0.87698228],
       [ 0.50284891, -1.75970133, -0.40894607],
       [ 0.80022383,  0.85491544, -1.85570565],
       [-0.24169405,  0.60380649,  1.04439084],
       [ 0.35555654, -0.5501156 , -0.11779746]])

In [7]:
# Now you can access it simply like a 2-D matrix
df.values[0,1]


Out[7]:
0.66447412572744957

In [8]:
# Pandas uses numexpr and bottleneck libraries for doing certain
# types of operations quickly like df1 > df2, df1*df2
pd.set_option('compute.use_bottleneck', True)
pd.set_option('compute.use_numexpr', True)

# These options are set by default

In [9]:
# Use fill_value to replace to fill missing values
# Not referring to NaN here
df1=pd.DataFrame(np.random.randn(3,2))
df2=pd.DataFrame(np.random.randn(3,3))

print(df1)
print(df2)


          0         1
0  0.787347 -0.229439
1  0.628489  0.173445
2 -0.605314  0.292630
          0         1         2
0  0.181332 -1.847278 -0.178296
1  0.174486 -0.241367 -0.409939
2  0.399726 -1.585013 -0.001370

In [10]:
# During addition the missing values to make the dataframes of
# same dimesnion would take value = 10
df1.add(df2, fill_value = 10)


Out[10]:
0 1 2
0 0.968680 -2.076717 9.821704
1 0.802974 -0.067922 9.590061
2 -0.205588 -1.292383 9.998630

In [11]:
# It has standard function for comparison resulting in boolean answers
# gt, lt, le, ge, ne
# For greater than , less than , greater than equal to

In [12]:
df1.ge(df2)


Out[12]:
0 1 2
0 True True False
1 True True False
2 False True False

In [13]:
# Boolean reductions like empty, any, all, bool
(df1>0.5).all()


Out[13]:
0    False
1    False
dtype: bool

In [14]:
(df1>0.5).any()


Out[14]:
0     True
1    False
dtype: bool

In [15]:
(df1>0.5).empty


Out[15]:
False

In [16]:
# To test whether a value is present or not
(df1 > 0.5).any().any()


Out[16]:
True

In [17]:
# To compare two series or datframes use
# In this method NaN are also treated equal 
(df1 + df1).equals(df1*2)


Out[17]:
True

In [ ]: