In [1]:
import pandas as pd
surveys_df = pd.read_csv("surveys.csv")
In [2]:
type(surveys_df)
Out[2]:
In [3]:
surveys_df['sex'].dtype
Out[3]:
In [4]:
surveys_df['record_id'].dtype
Out[4]:
In [5]:
surveys_df.dtypes
Out[5]:
In [6]:
# convert the record_id field from an integer to a float
surveys_df['record_id'] = surveys_df['record_id'].astype('float64')
surveys_df['record_id'].dtype
Out[6]:
In [7]:
len(surveys_df[pd.isnull(surveys_df.weight)])
Out[7]:
In [8]:
# how many rows have weight values?
len(surveys_df[surveys_df.weight> 0])
Out[8]:
In [9]:
df1 = surveys_df.copy()
# fill all NaN values with 0
df1['weight'] = df1['weight'].fillna(0)
In [11]:
df1['weight'].mean()
Out[11]:
In [12]:
df1['weight'] = surveys_df['weight'].fillna(surveys_df['weight'].mean())
In [ ]: