In [1]:
from platform import python_version
python_version()
Out[1]:
In [27]:
import sys
sys.executable
Out[27]:
In [2]:
import pandas as pd
import numpy as np
pd.__version__, np.__version__
Out[2]:
In [3]:
df = pd.DataFrame({
'name': ['alice','bob','charlie'],
'age': [25,26,27]
})
df = df[['name','age']]
df
Out[3]:
In [4]:
df.rename(columns={'name':'person_name'})
Out[4]:
In [5]:
df.rename(columns={'name':'person_name','age':'age_in_years'})
Out[5]:
In [6]:
df
Out[6]:
In [7]:
df2 = df.copy()
df2.columns = [col.upper() for col in df2.columns]
df2
Out[7]:
In [8]:
df2 = df.copy()
df2['name']= df['name'].map(lambda name: name.upper())
df2
Out[8]:
In [21]:
df_multiple = pd.DataFrame({
'text': ['foo bar','bar baz','baz quux','foo quux'],
'word': ['foo','foo','bar','foo']
})
df_multiple[['word','text']]
Out[21]:
In [22]:
df_multiple['word_is_in_text'] = df_multiple[['text','word']].apply(lambda row: row['word'] in row['text'], axis=1)
df_multiple[['word','text','word_is_in_text']]
Out[22]:
In [9]:
df2 = df.copy()
df2['age_times_two']= df['age'].map(lambda age: age*2)
df2
Out[9]:
In [10]:
df2 = df.copy()
df2['age_times_two']= df['age'] *2
df2
Out[10]:
In [11]:
df2 = df.copy()
df2['name_uppercase']= df['name'].map(lambda name: name.upper())
df2
Out[11]:
In [12]:
import pandas as pd
df = pd.DataFrame({
'name': ['alice','bob','charlie'],
'age': [25,26,np.nan],
'state': ['ak',np.nan,None]
})
print(df['name'].isnull().sum())
print(df['age'].isnull().sum())
print(df['state'].isnull().sum())
In [13]:
import pandas as pd
df = pd.DataFrame({
'name': ['alice','bob','charlie'],
'age': [25,26,27],
'state': ['ak','ny','dc']
})
print(df.columns.values)
In [14]:
import pandas as pd
df = pd.DataFrame({
'name': ['alice','bob','charlie'],
'age': [25,26,27],
'state': ['ak','ny','dc']
})
print(len(df.columns.values))
In [15]:
import pandas as pd
df = pd.DataFrame({
'name': ['alice','bob','charlie'],
'age': [25,26,27],
'state': ['ak','ny','dc']
})
df
Out[15]:
In [16]:
df2 = df[['name','age','state']]
df2
Out[16]:
In [17]:
df2 = df.copy()
df2.drop(columns=['age'],inplace=True)
df2
Out[17]:
In [18]:
df2 = df.copy()
df2.drop(columns=['age','name'],inplace=True)
df2
Out[18]:
In [19]:
df2 = df.copy()
states = pd.Series(['dc','ca','ny'])
df2['state'] = states
df2
Out[19]:
In [20]:
df2 = df.copy()
candidate_names = ['name','gender','age']
for name in candidate_names:
if name in df2.columns.values:
print('"{}" is a column name'.format(name))
In [21]:
df2 = df.copy()
col = pd.Series(['female','male','male'])
df2.insert(1,'gender',col)
df2
Out[21]:
In [22]:
import numpy as np
df2 = df.copy()
print(df2['age'].dtype)
df2['age'] = df2['age'].astype(str)
print(df2['age'].dtype)
df2['age'] = df2['age'].astype(np.uint8)
print(df2['age'].dtype)
In [23]:
df3 = pd.DataFrame({
'name': ['alice','bob','charlie'],
'date_of_birth': ['10/25/2005','10/29/2002','01/01/2001']
})[['name','date_of_birth']]
df3
Out[23]:
In [24]:
df3['date_of_birth'] = pd.to_datetime(df3['date_of_birth'])
df3
Out[24]:
In [25]:
df3 = pd.DataFrame({
'name': ['alice','bob','charlie'],
'date_of_birth': ['27/05/2001','16/02/1999','25/09/1998']
})[['name','date_of_birth']]
df3
Out[25]:
In [26]:
df3 = pd.DataFrame({
'name': ['alice','bob','charlie'],
'date_of_birth': ['27/05/2001','16/02/1999','25/09/1998']
})[['name','date_of_birth']]
df3
df3['date_of_birth'] = pd.to_datetime(df3['date_of_birth'],format='%d/%m/%Y')
df3
Out[26]:
In [48]:
df4 = pd.DataFrame({
'name': ['alice','bob','charlie','david'],
'age': [25,26,27,22],
})[['name', 'age']]
df4
Out[48]:
In [49]:
df4['name_uppercase'] = df4['name'].map(lambda element: element.upper())
df4
Out[49]:
Series.apply
In [51]:
df5 = pd.DataFrame({
'name': ['alice','bob','charlie','david'],
'age': [25,26,27,22],
})[['name', 'age']]
df5
Out[51]:
In [52]:
df5['name_uppercase'] = df5['name'].apply(lambda element: element.upper())
df5
Out[52]:
In [64]:
df6 = pd.DataFrame({
'name': ['alice','bob','charlie','david'],
'age': [25,26,27,22],
})[['name', 'age']]
df6
Out[64]:
In [66]:
df6['age_times_2'] = df6[['age']].apply(lambda arr: np.multiply(arr,2))
df6
Out[66]:
In [68]:
df7 = pd.DataFrame({
'name': ['alice','bob','charlie'],
'age': [25,26,27],
'number_of_children':[1,3,4]
})[['name','age','number_of_children']]
df4
Out[68]:
In [41]:
# returns a new dataframe with a single column
df4[['age']].apply(lambda arr: arr*2)
Out[41]:
In [39]:
# returns a new series
df4['age'].map(lambda element: element *2)
Out[39]:
In [ ]:
In [71]:
df_numeric = pd.DataFrame({
'x': np.random.normal(loc=0.0, scale=1.0, size=10000000),
})
df_numeric.head()
Out[71]:
In [78]:
%%time
def multiply_by_two(arr):
return np.multiply(arr,2)
df_numeric['2x_apply'] = df_numeric[['x']].apply(multiply_by_two)
In [79]:
%%time
def multiply_by_two_map(x):
return x*2
df_numeric['2x_map'] = df_numeric['x'].map(multiply_by_two)
In [80]:
df_numeric.head()
Out[80]: