In [ ]:
import pandas as pd

In [ ]:
# note that pd.read_csv is used because we imported pandas as pd
pd.read_csv("surveys.csv")

In [ ]:
surveys_df = pd.read_csv("surveys.csv")
surveys_df

In [ ]:
type(surveys_df)
# this does the same thing as the above!
surveys_df.__class__

In [ ]:
surveys_df.dtypes

In [ ]:
# Look at the column names
surveys_df.columns.values

In [ ]:
pd.unique(surveys_df['species_id'])

In [ ]:
surveys_df['weight'].describe()

In [ ]:
surveys_df['weight'].min()
surveys_df['weight'].max()
surveys_df['weight'].mean()
surveys_df['weight'].std()
surveys_df['weight'].count()

In [ ]:
# Group data by sex
grouped_data = surveys_df.groupby('sex')

In [ ]:
# summary statistics for all numeric columns by sex
grouped_data.describe()
# provide the mean for each numeric column by sex
grouped_data.mean()

In [ ]:
# count the number of samples by species
species_counts = surveys_df.groupby('species_id')['record_id'].count()
print(species_counts)

In [ ]:
surveys_df.groupby('species_id')['record_id'].count()['DO']

In [ ]:
# multiply all weight values by 2
surveys_df['weight']*2

In [ ]:
# make sure figures appear inline in Ipython Notebook
%matplotlib inline
# create a quick bar chart
species_counts.plot(kind='bar');

In [ ]:
total_count = surveys_df.groupby('plot_id')['record_id'].nunique()
# let's plot that too
total_count.plot(kind='bar');

In [ ]: