In [5]:
import pandas as pd
In [11]:
%matplotlib inline
In [17]:
#for encoding the command would look smth like this:
#df = pd.read_csv("XXXXXXXXXXXXXXXXX.csv", encoding='mac_roman')
df = pd.read_csv("Animal_Data/07-hw-animals.csv")
In [20]:
df.columns
Out[20]:
In [21]:
df.head(3)
Out[21]:
In [25]:
df.sort_values(by='length', ascending=False).head(3)
Out[25]:
In [26]:
df['animal'].value_counts()
Out[26]:
In [28]:
df[df['animal'] == 'dog']
Out[28]:
In [78]:
df[df['length'] > 40]
Out[78]:
In [84]:
del df['feet']
In [85]:
df['inches'] = df['length'] * 0.394
df.head()
Out[85]:
In [86]:
dogs = df[df['animal'] == 'dog']
cats = df[df['animal'] == 'cat']
In [87]:
cats[cats['inches'] > 12]
Out[87]:
In [129]:
#df[df[df[df['animal'] == 'cat']'inches'] > 12]
#df[df['animal'] == 'cat']&
#df[df['inches'] > 12]
#pd.read_csv('imdb.txt')
# .sort(columns='year')
# .filter('year >1990')
# .to_csv('filtered.csv')
df[(df['animal'] == 'cat') & (df['inches'] > 12)]
#3 > 2 & 4 > 3
#true & true
#true
#3 > 2 & 4 > 3
#true & 4 > 3
#(3 > 2) & (4 > 3)
Out[129]:
In [93]:
df[df['animal'] == 'cat'].describe()
Out[93]:
In [94]:
df[df['animal'] == 'dog'].describe()
Out[94]:
In [95]:
df.groupby(['animal'])['inches'].mean()
Out[95]:
In [96]:
df[df['animal'] == 'dog'].hist()
Out[96]:
In [97]:
import matplotlib.pyplot as plt
plt.style.available
Out[97]:
In [110]:
plt.style.use('ggplot')
In [111]:
dogs['inches'].hist()
Out[111]:
In [142]:
df.plot(kind='barh', x='name', y='length', legend=False)
Out[142]:
In [146]:
cats_sorted = cats.sort_values(by='length', ascending=True).head(3)
cats_sorted.plot(kind='barh', x='name', y='length', legend=False)
Out[146]:
In [ ]: