In [1]:
import pandas as pd
surveys_df = pd.read_csv("data/surveys.csv")

In [2]:
type(surveys_df)


Out[2]:
pandas.core.frame.DataFrame

In [3]:
surveys_df['record_id'].dtype


Out[3]:
dtype('int64')

In [4]:
surveys_df.dtypes


Out[4]:
record_id            int64
month                int64
day                  int64
year                 int64
plot_id              int64
species_id          object
sex                 object
hindfoot_length    float64
weight             float64
dtype: object

In [5]:
surveys_df['record_id'] = surveys_df['record_id'].astype('float64')
surveys_df['record_id'].dtype


Out[5]:
dtype('float64')

In [6]:
surveys_df['weight'].mean()


Out[6]:
42.672428212991356

In [7]:
print(len(surveys_df[pd.isnull(surveys_df.weight)]))
print(len(surveys_df[surveys_df.weight> 0]))


3266
32283

In [8]:
df1 = surveys_df.copy()
df1['weight'] = df1['weight'].fillna(0)

In [9]:
df1['weight'].mean()


Out[9]:
38.751976145601844

In [10]:
df1['weight'] = surveys_df['weight'].fillna(surveys_df['weight'].mean())