In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('data/src/sample_pandas_normal.csv').head(3)
print(df)
In [3]:
mask = [True, False, True]
df_mask = df[mask]
print(df_mask)
In [4]:
print(df['state'] == 'CA')
In [5]:
print(df[df['state'] == 'CA'])
In [6]:
print(df['name'].str.contains('li'))
In [7]:
print(df[df['name'].str.contains('li')])
In [8]:
df_nan = df.copy()
df_nan.iloc[2, 0] = float('nan')
print(df_nan)
In [9]:
print(df_nan['name'].str.contains('li'))
In [10]:
# print(df_nan[df_nan['name'].str.contains('li')])
# ValueError: cannot index with vector containing NA / NaN values
In [11]:
print(df_nan['name'].str.contains('li', na=False))
In [12]:
print(df_nan['name'].str.contains('li', na=True))
In [13]:
print(df['name'].str.contains('LI'))
In [14]:
print(df['name'].str.contains('LI', case=False))
In [15]:
print(df['name'].str.contains('i.*e'))
In [16]:
print(df['name'].str.contains('i.*e', regex=False))
In [17]:
df_q = df.copy()
df_q.iloc[2, 0] += '?'
print(df_q)
In [18]:
# print(df_q['name'].str.contains('?'))
# error: nothing to repeat at position 0
In [19]:
print(df_q['name'].str.contains('?', regex=False))
In [20]:
print(df_q['name'].str.contains('\?'))
In [21]:
print(df['name'].str.endswith('e'))
In [22]:
print(df[df['name'].str.endswith('e')])
In [23]:
print(df['name'].str.startswith('B'))
In [24]:
print(df[df['name'].str.startswith('B')])
In [25]:
print(df['name'].str.match('.*i.*e'))
In [26]:
print(df[df['name'].str.match('.*i.*e')])
In [27]:
print(df['name'].str.match('.*i'))
In [28]:
print(df['name'].str.match('i.*e'))