In [1]:
import pandas as pd
import numpy as np
pd.__version__, np.__version__
Out[1]:
In [2]:
df = pd.DataFrame({
'name': ['alice smith','bob jones','charlie joneson','daisy white'],
'age': [25,20,30,35]
})
In [3]:
df
Out[3]:
In [4]:
df[df['name'].str.contains('jones',regex=False)]
Out[4]:
In [5]:
df[df['name'].str.contains('^b|da')]
Out[5]:
In [6]:
df = pd.DataFrame({
'first_name': ['alice','bob','charlie','daisy'],
'last_name':['smith','jones','joneson','white'],
'age': [25,20,30,35]
})
df
Out[6]:
In [7]:
df['full_name'] = df['first_name'] + df['last_name']
df
Out[7]:
In [8]:
df['full_name'] = df['first_name'] + ' ' + df['last_name']
df
Out[8]:
In [9]:
df = pd.DataFrame({
'name': ['alice smith','bob jones','charlie joneson','daisy white'],
'age': [25,20,30,35]
})
df
Out[9]:
In [10]:
def split_name(name):
first_name, last_name = name.split(' ')
return pd.Series({
'first_name': first_name,
'last_name': last_name
})
df_new = df['name'].apply(split_name)
df_final = pd.concat([df,df_new],axis=1)
df_final
Out[10]:
In [11]:
s = pd.Series(['foo','bar','baz'])
In [12]:
s.str.capitalize()
Out[12]: