In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.__version__, np.__version__


Out[2]:
('1.0.3', '1.18.2')

In [3]:
# needs two steps
# one to assign the dataframe to a variable
df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK',np.nan]
})
df


Out[3]:
name country
0 john USA
1 david UK
2 anna NaN

In [4]:
# another one to perform the filter
df[df['country']=='USA']


Out[4]:
name country
0 john USA

In [5]:
pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK',np.nan]
}).query("country == 'USA'")


Out[5]:
name country
0 john USA

query where is null


In [ ]:
df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK',np.nan]
})

df.query('country.isnull()')

query with python variable


In [6]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK',np.nan],
    'age':[23,45,45]
})

target_age = 45

df.query('age == @target_age')


Out[6]:
name country age
1 david UK 45
2 anna NaN 45

OR operator


In [34]:
import pandas as pd

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK', 'USA'],
    'age':[23,45,45]
})
df


Out[34]:
name country age
0 john USA 23
1 david UK 45
2 anna USA 45

In [23]:
df.query("(name=='john') or (country=='UK')")


Out[23]:
name country age
0 john USA 23
1 david UK 45

AND operator


In [35]:
import pandas as pd

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK', 'USA'],
    'age':[23,45,45]
})
df


Out[35]:
name country age
0 john USA 23
1 david UK 45
2 anna USA 45

In [36]:
df.query("(country=='USA') and (age==23)")


Out[36]:
name country age
0 john USA 23

Column is in array


In [37]:
import pandas as pd

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK', 'USA'],
    'age':[23,45,45]
})
df


Out[37]:
name country age
0 john USA 23
1 david UK 45
2 anna USA 45

In [38]:
names_array = ['john','anna']

df.query('name in @names_array')


Out[38]:
name country age
0 john USA 23
2 anna USA 45

Column is not in array


In [32]:
import pandas as pd

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK', 'USA'],
    'age':[23,45,45]
})
df


Out[32]:
name country age
0 john USA 23
1 david UK 45
2 anna USA 45

In [33]:
invalid_array = ['anna']

df.query('name not in @invalid_array')


Out[33]:
name country age
0 john USA 23
1 david UK 45

Column name with spaces


In [ ]:
import pandas as pd

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country of birth':['USA','UK', 'USA'],
    'age':[23,45,45]
})
df

Where column is null


In [9]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK',np.nan]
})

df


Out[9]:
name country
0 john USA
1 david UK
2 anna NaN

In [10]:
df.query('country.isnull()')


Out[10]:
name country
2 anna NaN

Where column is not null


In [11]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK',np.nan]
})

df


Out[11]:
name country
0 john USA
1 david UK
2 anna NaN

In [12]:
df.query('country.notnull()')


Out[12]:
name country
0 john USA
1 david UK

In [ ]: