In [4]:
import pandas as pd

In [5]:
import matplotlib.pyplot as plt
%matplotlib inline

In [7]:
#for encoding the command would look smth like this:
#df = pd.read_csv("XXXXXXXXXXXXXXXXX.csv", encoding='mac_roman')
df = pd.read_csv("Animal_Data/07-hw-animals.csv")

In [8]:
df.columns


Out[8]:
Index(['animal', 'name', 'length'], dtype='object')

In [9]:
df.head(3)


Out[9]:
animal name length
0 cat Anne 35
1 cat Bob 45
2 dog Egglesburg 65

In [10]:
df.sort_values(by='length', ascending=False).head(3)


Out[10]:
animal name length
2 dog Egglesburg 65
3 dog Devon 50
1 cat Bob 45

In [11]:
df['animal'].value_counts()


Out[11]:
cat    3
dog    3
Name: animal, dtype: int64

In [14]:
#df['animal'] == 'dog' this just tests, whether row is a dog or not, True or False
#is_dog = df['animal'] == 'dog'
#df[is_dog]
df[df['animal'] == 'dog']


Out[14]:
animal name length
2 dog Egglesburg 65
3 dog Devon 50
5 dog Fontaine 35

In [15]:
df[df['length'] > 40]


Out[15]:
animal name length
1 cat Bob 45
2 dog Egglesburg 65
3 dog Devon 50

In [18]:
#del df['feet']

In [19]:
df['inches'] = df['length'] * 0.394
df.head()


Out[19]:
animal name length inches
0 cat Anne 35 13.790
1 cat Bob 45 17.730
2 dog Egglesburg 65 25.610
3 dog Devon 50 19.700
4 cat Charlie 32 12.608

In [23]:
dogs = df[df['animal'] == 'dog']
cats = df[df['animal'] == 'cat']

In [24]:
cats[cats['inches'] > 12]


Out[24]:
animal name length inches
0 cat Anne 35 13.790
1 cat Bob 45 17.730
4 cat Charlie 32 12.608

In [25]:
#df[df[df[df['animal'] == 'cat']'inches'] > 12]
#df[df['animal'] == 'cat']&
#df[df['inches'] > 12]

#pd.read_csv('imdb.txt')
#  .sort(columns='year')
#  .filter('year >1990')
#  .to_csv('filtered.csv')

df[(df['animal'] == 'cat') & (df['inches'] > 12)]

#3 > 2 & 4 > 3
#true  & true
#true

#3 > 2 & 4 > 3
#true  & 4 > 3

#(3 > 2) & (4 > 3)


Out[25]:
animal name length inches
0 cat Anne 35 13.790
1 cat Bob 45 17.730
4 cat Charlie 32 12.608

In [26]:
df[df['animal'] == 'cat'].describe()


Out[26]:
length inches
count 3.000000 3.000000
mean 37.333333 14.709333
std 6.806859 2.681903
min 32.000000 12.608000
25% 33.500000 13.199000
50% 35.000000 13.790000
75% 40.000000 15.760000
max 45.000000 17.730000

In [27]:
df[df['animal'] == 'dog'].describe()


Out[27]:
length inches
count 3.0 3.000
mean 50.0 19.700
std 15.0 5.910
min 35.0 13.790
25% 42.5 16.745
50% 50.0 19.700
75% 57.5 22.655
max 65.0 25.610

In [29]:
df.groupby(['animal'])['inches'].describe()


Out[29]:
animal       
cat     count     3.000000
        mean     14.709333
        std       2.681903
        min      12.608000
        25%      13.199000
        50%      13.790000
        75%      15.760000
        max      17.730000
dog     count     3.000000
        mean     19.700000
        std       5.910000
        min      13.790000
        25%      16.745000
        50%      19.700000
        75%      22.655000
        max      25.610000
Name: inches, dtype: float64

In [30]:
df[df['animal'] == 'dog'].hist()


Out[30]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1134ae7b8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x113652a58>]], dtype=object)

In [31]:
import matplotlib.pyplot as plt
plt.style.available


Out[31]:
['seaborn-deep',
 'fivethirtyeight',
 'classic',
 'seaborn-pastel',
 'seaborn-bright',
 'seaborn-white',
 'ggplot',
 'grayscale',
 'seaborn-dark',
 'seaborn-ticks',
 'bmh',
 'seaborn-muted',
 'seaborn-paper',
 'seaborn-dark-palette',
 'seaborn-poster',
 'seaborn-colorblind',
 'seaborn-talk',
 'dark_background',
 'seaborn-whitegrid',
 'seaborn-darkgrid',
 'seaborn-notebook']

In [32]:
plt.style.use('ggplot')

In [33]:
dogs['inches'].hist()


Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0x1136bda90>

In [35]:
df['length'].plot(kind='bar')


Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x113901160>

In [ ]:
#or:

In [34]:
df.plot(kind='barh', x='name', y='length', legend=False)


Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x1138328d0>

In [36]:
cats_sorted = cats.sort_values(by='length', ascending=True).head(3)
cats_sorted.plot(kind='barh', x='name', y='length', legend=False)


Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x11397b5f8>

In [ ]:
#or:

In [40]:
df[df['animal'] == 'cat'].sort_values(by='length', ascending=True).plot(kind='barh', x='name', y='length')


Out[40]:
<matplotlib.axes._subplots.AxesSubplot at 0x113d6b400>

In [ ]: