notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np



In [2]:

    
# Just creating data structures
index = pd.date_range('1/1/2018', periods =8)
s =pd.Series(np.random.randn(5), index = ['a', 'b', 'c', 'd', 'e'])
df = pd.DataFrame(np.random.randn(8,3), index = index, columns=['A', 'B', 'C'])



In [3]:

    
df.shape









    Out[3]:





(8, 3)



In [4]:

    
s.shape









    Out[4]:





(5,)



In [5]:

    
s.values









    Out[5]:





array([ 1.10119179, -1.72636575,  0.13548559, -0.4199017 ,  2.48382293])



In [6]:

    
df.values









    Out[6]:





array([[ 1.16880771,  0.66447413, -1.10776953],
       [ 0.62798713, -1.09639665,  0.34518614],
       [-1.05039301, -1.16528949, -0.66624711],
       [-0.2156926 ,  1.29324589, -0.87698228],
       [ 0.50284891, -1.75970133, -0.40894607],
       [ 0.80022383,  0.85491544, -1.85570565],
       [-0.24169405,  0.60380649,  1.04439084],
       [ 0.35555654, -0.5501156 , -0.11779746]])



In [7]:

    
# Now you can access it simply like a 2-D matrix
df.values[0,1]









    Out[7]:





0.66447412572744957



In [8]:

    
# Pandas uses numexpr and bottleneck libraries for doing certain
# types of operations quickly like df1 > df2, df1*df2
pd.set_option('compute.use_bottleneck', True)
pd.set_option('compute.use_numexpr', True)

# These options are set by default



In [9]:

    
# Use fill_value to replace to fill missing values
# Not referring to NaN here
df1=pd.DataFrame(np.random.randn(3,2))
df2=pd.DataFrame(np.random.randn(3,3))

print(df1)
print(df2)









    



          0         1
0  0.787347 -0.229439
1  0.628489  0.173445
2 -0.605314  0.292630
          0         1         2
0  0.181332 -1.847278 -0.178296
1  0.174486 -0.241367 -0.409939
2  0.399726 -1.585013 -0.001370



In [10]:

    
# During addition the missing values to make the dataframes of
# same dimesnion would take value = 10
df1.add(df2, fill_value = 10)



In [11]:

    
# It has standard function for comparison resulting in boolean answers
# gt, lt, le, ge, ne
# For greater than , less than , greater than equal to



In [12]:

    
df1.ge(df2)









    Out[12]:







  
    
      
      0
      1
      2
    
  
  
    
      0
      True
      True
      False
    
    
      1
      True
      True
      False
    
    
      2
      False
      True
      False



In [13]:

    
# Boolean reductions like empty, any, all, bool
(df1>0.5).all()









    Out[13]:





0    False
1    False
dtype: bool



In [14]:

    
(df1>0.5).any()









    Out[14]:





0     True
1    False
dtype: bool



In [15]:

    
(df1>0.5).empty









    Out[15]:





False



In [16]:

    
# To test whether a value is present or not
(df1 > 0.5).any().any()









    Out[16]:





True



In [17]:

    
# To compare two series or datframes use
# In this method NaN are also treated equal 
(df1 + df1).equals(df1*2)









    Out[17]:





True



In [ ]:

	0	1	2
0	0.968680	-2.076717	9.821704
1	0.802974	-0.067922	9.590061
2	-0.205588	-1.292383	9.998630