In [5]:
import pandas as pd

In [11]:
%matplotlib inline

In [17]:
#for encoding the command would look smth like this:
#df = pd.read_csv("XXXXXXXXXXXXXXXXX.csv", encoding='mac_roman')
df = pd.read_csv("Animal_Data/07-hw-animals.csv")

In [20]:
df.columns


Out[20]:
Index(['animal', 'name', 'length'], dtype='object')

In [21]:
df.head(3)


Out[21]:
animal name length
0 cat Anne 35
1 cat Bob 45
2 dog Egglesburg 65

In [25]:
df.sort_values(by='length', ascending=False).head(3)


Out[25]:
animal name length
2 dog Egglesburg 65
3 dog Devon 50
1 cat Bob 45

In [26]:
df['animal'].value_counts()


Out[26]:
dog    3
cat    3
Name: animal, dtype: int64

In [28]:
df[df['animal'] == 'dog']


Out[28]:
animal name length
2 dog Egglesburg 65
3 dog Devon 50
5 dog Fontaine 35

In [78]:
df[df['length'] > 40]


Out[78]:
animal name length feet inches
1 cat Bob 45 17.73 17.73
2 dog Egglesburg 65 25.61 25.61
3 dog Devon 50 19.70 19.70

In [84]:
del df['feet']

In [85]:
df['inches'] = df['length'] * 0.394
df.head()


Out[85]:
animal name length inches
0 cat Anne 35 13.790
1 cat Bob 45 17.730
2 dog Egglesburg 65 25.610
3 dog Devon 50 19.700
4 cat Charlie 32 12.608

In [86]:
dogs = df[df['animal'] == 'dog']
cats = df[df['animal'] == 'cat']

In [87]:
cats[cats['inches'] > 12]


Out[87]:
animal name length inches
0 cat Anne 35 13.790
1 cat Bob 45 17.730
4 cat Charlie 32 12.608

In [129]:
#df[df[df[df['animal'] == 'cat']'inches'] > 12]
#df[df['animal'] == 'cat']&
#df[df['inches'] > 12]

#pd.read_csv('imdb.txt')
#  .sort(columns='year')
#  .filter('year >1990')
#  .to_csv('filtered.csv')

df[(df['animal'] == 'cat') & (df['inches'] > 12)]

#3 > 2 & 4 > 3
#true  & true
#true

#3 > 2 & 4 > 3
#true  & 4 > 3

#(3 > 2) & (4 > 3)


Out[129]:
animal name length inches
0 cat Anne 35 13.790
1 cat Bob 45 17.730
4 cat Charlie 32 12.608

In [93]:
df[df['animal'] == 'cat'].describe()


Out[93]:
length inches
count 3.000000 3.000000
mean 37.333333 14.709333
std 6.806859 2.681903
min 32.000000 12.608000
25% 33.500000 13.199000
50% 35.000000 13.790000
75% 40.000000 15.760000
max 45.000000 17.730000

In [94]:
df[df['animal'] == 'dog'].describe()


Out[94]:
length inches
count 3.0 3.000
mean 50.0 19.700
std 15.0 5.910
min 35.0 13.790
25% 42.5 16.745
50% 50.0 19.700
75% 57.5 22.655
max 65.0 25.610

In [95]:
df.groupby(['animal'])['inches'].mean()


Out[95]:
animal
cat    14.709333
dog    19.700000
Name: inches, dtype: float64

In [96]:
df[df['animal'] == 'dog'].hist()


Out[96]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x10dbe7ac8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10dc3f860>]], dtype=object)

In [97]:
import matplotlib.pyplot as plt
plt.style.available


Out[97]:
['seaborn-darkgrid',
 'seaborn-dark',
 'seaborn-deep',
 'ggplot',
 'seaborn-whitegrid',
 'seaborn-notebook',
 'seaborn-dark-palette',
 'grayscale',
 'dark_background',
 'bmh',
 'fivethirtyeight',
 'seaborn-poster',
 'seaborn-paper',
 'seaborn-colorblind',
 'seaborn-white',
 'seaborn-ticks',
 'seaborn-bright',
 'seaborn-muted',
 'classic',
 'seaborn-talk',
 'seaborn-pastel']

In [110]:
plt.style.use('ggplot')

In [111]:
dogs['inches'].hist()


Out[111]:
<matplotlib.axes._subplots.AxesSubplot at 0x10e9924a8>

In [142]:
df.plot(kind='barh', x='name', y='length', legend=False)


Out[142]:
<matplotlib.axes._subplots.AxesSubplot at 0x10fe1d9e8>

In [146]:
cats_sorted = cats.sort_values(by='length', ascending=True).head(3)
cats_sorted.plot(kind='barh', x='name', y='length', legend=False)


Out[146]:
<matplotlib.axes._subplots.AxesSubplot at 0x1101df940>

In [ ]: