In [ ]:
import pandas as pd
In [ ]:
# note that pd.read_csv is used because we imported pandas as pd
pd.read_csv("surveys.csv")
In [ ]:
surveys_df = pd.read_csv("surveys.csv")
surveys_df
In [ ]:
type(surveys_df)
# this does the same thing as the above!
surveys_df.__class__
In [ ]:
surveys_df.dtypes
In [ ]:
# Look at the column names
surveys_df.columns.values
In [ ]:
pd.unique(surveys_df['species_id'])
In [ ]:
surveys_df['weight'].describe()
In [ ]:
surveys_df['weight'].min()
surveys_df['weight'].max()
surveys_df['weight'].mean()
surveys_df['weight'].std()
surveys_df['weight'].count()
In [ ]:
# Group data by sex
grouped_data = surveys_df.groupby('sex')
In [ ]:
# summary statistics for all numeric columns by sex
grouped_data.describe()
# provide the mean for each numeric column by sex
grouped_data.mean()
In [ ]:
# count the number of samples by species
species_counts = surveys_df.groupby('species_id')['record_id'].count()
print(species_counts)
In [ ]:
surveys_df.groupby('species_id')['record_id'].count()['DO']
In [ ]:
# multiply all weight values by 2
surveys_df['weight']*2
In [ ]:
# make sure figures appear inline in Ipython Notebook
%matplotlib inline
# create a quick bar chart
species_counts.plot(kind='bar');
In [ ]:
total_count = surveys_df.groupby('plot_id')['record_id'].nunique()
# let's plot that too
total_count.plot(kind='bar');
In [ ]: