In [1]:
import pandas as pd
surveys_df = pd.read_csv("surveys.csv")

In [2]:
type(surveys_df)


Out[2]:
pandas.core.frame.DataFrame

In [3]:
surveys_df['sex'].dtype


Out[3]:
dtype('O')

In [4]:
surveys_df['record_id'].dtype


Out[4]:
dtype('int64')

In [5]:
surveys_df.dtypes


Out[5]:
record_id            int64
month                int64
day                  int64
year                 int64
plot_id              int64
species_id          object
sex                 object
hindfoot_length    float64
weight             float64
dtype: object

In [6]:
# convert the record_id field from an integer to a float
surveys_df['record_id'] = surveys_df['record_id'].astype('float64')
surveys_df['record_id'].dtype


Out[6]:
dtype('float64')

In [7]:
len(surveys_df[pd.isnull(surveys_df.weight)])


Out[7]:
3266

In [8]:
# how many rows have weight values?
len(surveys_df[surveys_df.weight> 0])


Out[8]:
32283

In [9]:
df1 = surveys_df.copy()
# fill all NaN values with 0
df1['weight'] = df1['weight'].fillna(0)

In [11]:
df1['weight'].mean()


Out[11]:
38.751976145601844

In [12]:
df1['weight'] = surveys_df['weight'].fillna(surveys_df['weight'].mean())

In [ ]: