In [1]:
import pandas as pd
In [35]:
df=pd.read_csv("07-hw-animals.csv")
In [36]:
!pip install matplotlib
In [67]:
import matplotlib.pyplot as plt
In [68]:
%matplotlib inline
In [69]:
df
Out[69]:
In [70]:
df.columns
Out[70]:
In [71]:
columns_to_show = ['animal','name','length']
df[columns_to_show][0:3]
Out[71]:
In [72]:
df.sort_values(by='length', ascending=False).head(3) #sort_values, not sort, and sort by length column
Out[72]:
In [73]:
df['animal'].value_counts() #value_counts()
Out[73]:
In [74]:
is_dog = df['animal']=='dog'
In [75]:
df[df['animal'] == 'dog'] #df['animal]==dog will only give a list of true or false, if you want the rows back, you have to put a df[] to put the is_dog into a data frame
Out[75]:
In [76]:
df[df['length'] > 40]
Out[76]:
In [77]:
df['inches']=df['length']/2.54
df
Out[77]:
In [78]:
dogs=df['animal'] == 'dog'
cats=df['animal'] == 'cat'
In [79]:
df[(df['animal'] =='cat') & (df['inches']>12)] #panda only recognizes the conditions in parentaces
Out[79]:
In [80]:
df[cats].describe()
#the mean length of a cat is 37.33333
Out[80]:
In [81]:
df[dogs].describe()
# the mean length of a dog is 50inches
Out[81]:
In [82]:
df.groupby([cats,dogs]).describe()
Out[82]:
In [94]:
new_dog_data = df[df['animal'] == 'dog']
new_dog_data['length'].hist()
Out[94]:
In [95]:
plt.style.use('ggplot')
new_dog_data['length'].hist()
Out[95]:
In [100]:
df.plot(kind='barh', x='name',y='length') #length='false' remove the length #barh: a hornizontal bar chat
Out[100]:
In [103]:
df[df['animal']== 'cat'].sort_values(by='length').plot(kind='barh', x='name', y='length')
Out[103]:
In [ ]: