notebook.community

Edit and run



In [ ]:

    
import pandas as pd



In [ ]:

    
# note that pd.read_csv is used because we imported pandas as pd
pd.read_csv("surveys.csv")



In [ ]:

    
surveys_df = pd.read_csv("surveys.csv")
surveys_df



In [ ]:

    
type(surveys_df)
# this does the same thing as the above!
surveys_df.__class__



In [ ]:

    
surveys_df.dtypes



In [ ]:

    
# Look at the column names
surveys_df.columns.values



In [ ]:

    
pd.unique(surveys_df['species_id'])



In [ ]:

    
surveys_df['weight'].describe()



In [ ]:

    
surveys_df['weight'].min()
surveys_df['weight'].max()
surveys_df['weight'].mean()
surveys_df['weight'].std()
surveys_df['weight'].count()



In [ ]:

    
# Group data by sex
grouped_data = surveys_df.groupby('sex')



In [ ]:

    
# summary statistics for all numeric columns by sex
grouped_data.describe()
# provide the mean for each numeric column by sex
grouped_data.mean()



In [ ]:

    
# count the number of samples by species
species_counts = surveys_df.groupby('species_id')['record_id'].count()
print(species_counts)



In [ ]:

    
surveys_df.groupby('species_id')['record_id'].count()['DO']



In [ ]:

    
# multiply all weight values by 2
surveys_df['weight']*2



In [ ]:

    
# make sure figures appear inline in Ipython Notebook
%matplotlib inline
# create a quick bar chart
species_counts.plot(kind='bar');



In [ ]:

    
total_count = surveys_df.groupby('plot_id')['record_id'].nunique()
# let's plot that too
total_count.plot(kind='bar');



In [ ]: