In [52]:
from pandas import Series, DataFrame
import pandas as pd
In [70]:
f = r'/home/hase/Documents/ZHAW/InfoEng/Lectures/Scripting/data/titanic3_test.csv'
fo = r'/home/hase/Documents/ZHAW/InfoEng/Lectures/Scripting/data/submit/titanic3_test_gender.csv'
In [54]:
df = pd.read_csv(f, sep=';', index_col='id', usecols=['id', 'sex'])
In [55]:
df.head() # Get the first five rows of the dataframe
Out[55]:
In [56]:
def gender(row):
if row['sex'] == 'female':
return 1
else:
return 0
In [57]:
df['survived'] = df.apply(lambda row: gender(row),axis=1) # axis=1 means it applies to a row level
# Needs to be lambda to a pass a function to df.apply?
In [58]:
df.head()
Out[58]:
In [63]:
df.drop('sex', axis=1, inplace=True) # axis=1 means column-wise, and inplace=True does operation in place
# i.e. no need to do df = df.drop(....)
In [64]:
df.head()
Out[64]:
In [84]:
df.index.name = 'key'
In [85]:
df.index.name
Out[85]:
In [81]:
df.rename(columns={'survived':'value'}, inplace=True)
In [86]:
df.head()
Out[86]:
In [87]:
df.to_csv(fo, sep=';')
In [ ]: