In [4]:
import pandas as pd
In [5]:
import matplotlib.pyplot as plt
%matplotlib inline
In [7]:
#for encoding the command would look smth like this:
#df = pd.read_csv("XXXXXXXXXXXXXXXXX.csv", encoding='mac_roman')
df = pd.read_csv("Animal_Data/07-hw-animals.csv")
In [8]:
df.columns
Out[8]:
In [9]:
df.head(3)
Out[9]:
In [10]:
df.sort_values(by='length', ascending=False).head(3)
Out[10]:
In [11]:
df['animal'].value_counts()
Out[11]:
In [14]:
#df['animal'] == 'dog' this just tests, whether row is a dog or not, True or False
#is_dog = df['animal'] == 'dog'
#df[is_dog]
df[df['animal'] == 'dog']
Out[14]:
In [15]:
df[df['length'] > 40]
Out[15]:
In [18]:
#del df['feet']
In [19]:
df['inches'] = df['length'] * 0.394
df.head()
Out[19]:
In [23]:
dogs = df[df['animal'] == 'dog']
cats = df[df['animal'] == 'cat']
In [24]:
cats[cats['inches'] > 12]
Out[24]:
In [25]:
#df[df[df[df['animal'] == 'cat']'inches'] > 12]
#df[df['animal'] == 'cat']&
#df[df['inches'] > 12]
#pd.read_csv('imdb.txt')
# .sort(columns='year')
# .filter('year >1990')
# .to_csv('filtered.csv')
df[(df['animal'] == 'cat') & (df['inches'] > 12)]
#3 > 2 & 4 > 3
#true & true
#true
#3 > 2 & 4 > 3
#true & 4 > 3
#(3 > 2) & (4 > 3)
Out[25]:
In [26]:
df[df['animal'] == 'cat'].describe()
Out[26]:
In [27]:
df[df['animal'] == 'dog'].describe()
Out[27]:
In [29]:
df.groupby(['animal'])['inches'].describe()
Out[29]:
In [30]:
df[df['animal'] == 'dog'].hist()
Out[30]:
In [31]:
import matplotlib.pyplot as plt
plt.style.available
Out[31]:
In [32]:
plt.style.use('ggplot')
In [33]:
dogs['inches'].hist()
Out[33]:
In [35]:
df['length'].plot(kind='bar')
Out[35]:
In [ ]:
#or:
In [34]:
df.plot(kind='barh', x='name', y='length', legend=False)
Out[34]:
In [36]:
cats_sorted = cats.sort_values(by='length', ascending=True).head(3)
cats_sorted.plot(kind='barh', x='name', y='length', legend=False)
Out[36]:
In [ ]:
#or:
In [40]:
df[df['animal'] == 'cat'].sort_values(by='length', ascending=True).plot(kind='barh', x='name', y='length')
Out[40]:
In [ ]: