notebook.community

Edit and run



In [1]:

    
import pandas as pd
surveys_df = pd.read_csv("surveys.csv")



In [2]:

    
type(surveys_df)









    Out[2]:





pandas.core.frame.DataFrame



In [3]:

    
surveys_df['sex'].dtype









    Out[3]:





dtype('O')



In [4]:

    
surveys_df['record_id'].dtype









    Out[4]:





dtype('int64')



In [5]:

    
surveys_df.dtypes









    Out[5]:





record_id            int64
month                int64
day                  int64
year                 int64
plot_id              int64
species_id          object
sex                 object
hindfoot_length    float64
weight             float64
dtype: object



In [6]:

    
# convert the record_id field from an integer to a float
surveys_df['record_id'] = surveys_df['record_id'].astype('float64')
surveys_df['record_id'].dtype









    Out[6]:





dtype('float64')



In [7]:

    
len(surveys_df[pd.isnull(surveys_df.weight)])









    Out[7]:





3266



In [8]:

    
# how many rows have weight values?
len(surveys_df[surveys_df.weight> 0])









    Out[8]:





32283



In [9]:

    
df1 = surveys_df.copy()
# fill all NaN values with 0
df1['weight'] = df1['weight'].fillna(0)



In [11]:

    
df1['weight'].mean()









    Out[11]:





38.751976145601844



In [12]:

    
df1['weight'] = surveys_df['weight'].fillna(surveys_df['weight'].mean())



In [ ]: