In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('07-hw-animals.csv')

In [4]:
df


Out[4]:
animal name length
0 cat Anne 35
1 cat Bob 45
2 dog Egglesburg 65
3 dog Devon 50
4 cat Charlie 32
5 dog Fontaine 35

In [5]:
!pip install matplotlib


Requirement already satisfied (use --upgrade to upgrade): matplotlib in /Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages
Requirement already satisfied (use --upgrade to upgrade): pyparsing!=2.0.0,!=2.0.4,>=1.5.6 in /Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages (from matplotlib)
Requirement already satisfied (use --upgrade to upgrade): numpy>=1.6 in /Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages (from matplotlib)
Requirement already satisfied (use --upgrade to upgrade): cycler in /Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages (from matplotlib)
Requirement already satisfied (use --upgrade to upgrade): python-dateutil in /Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages (from matplotlib)
Requirement already satisfied (use --upgrade to upgrade): pytz in /Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages (from matplotlib)
Requirement already satisfied (use --upgrade to upgrade): six in /Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages (from cycler->matplotlib)

In [6]:
df.columns.values


Out[6]:
array(['animal', 'name', 'length'], dtype=object)

In [7]:
df.head(3)


Out[7]:
animal name length
0 cat Anne 35
1 cat Bob 45
2 dog Egglesburg 65

In [8]:
df.sort_values('length', ascending = False).head(3)


Out[8]:
animal name length
2 dog Egglesburg 65
3 dog Devon 50
1 cat Bob 45

In [9]:
df['animal'].value_counts()


Out[9]:
cat    3
dog    3
Name: animal, dtype: int64

In [10]:
df[(df['animal'] == 'dog')].value_counts()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-10-7f5c2bb95d67> in <module>()
----> 1 df[(df['animal'] == 'dog')].value_counts()

/Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages/pandas/core/generic.py in __getattr__(self, name)
   2670             if name in self._info_axis:
   2671                 return self[name]
-> 2672             return object.__getattribute__(self, name)
   2673 
   2674     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'value_counts'

In [11]:
df[df['animal'] == 'dog'].head()


Out[11]:
animal name length
2 dog Egglesburg 65
3 dog Devon 50
5 dog Fontaine 35

In [12]:
df[df['length'] > 40]


Out[12]:
animal name length
1 cat Bob 45
2 dog Egglesburg 65
3 dog Devon 50

In [13]:
df['inches'] = df['length'] / 2.54
df.head()


Out[13]:
animal name length inches
0 cat Anne 35 13.779528
1 cat Bob 45 17.716535
2 dog Egglesburg 65 25.590551
3 dog Devon 50 19.685039
4 cat Charlie 32 12.598425

In [14]:
cats = df[df['animal'] == 'cat']
dogs = df[df['animal'] == 'dog']

In [15]:
cats = df[df['animal'] == 'cat']
bigger_than_twelve = df['inches'] < 12
df(cats)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-15-deeb3a55fcc5> in <module>()
      1 cats = df[df['animal'] == 'cat']
      2 bigger_than_twelve = df['inches'] < 12
----> 3 df(cats)

TypeError: 'DataFrame' object is not callable

In [16]:
df[(df['animal'] == 'cats') & (df['inches'] < 12)].head()


Out[16]:
animal name length inches

In [17]:
df['length'].describe()


Out[17]:
count     6.000000
mean     43.666667
std      12.516656
min      32.000000
25%      35.000000
50%      40.000000
75%      48.750000
max      65.000000
Name: length, dtype: float64

In [18]:
cats['length'].describe()


Out[18]:
count     3.000000
mean     37.333333
std       6.806859
min      32.000000
25%      33.500000
50%      35.000000
75%      40.000000
max      45.000000
Name: length, dtype: float64

In [19]:
dogs['length'].describe()


Out[19]:
count     3.0
mean     50.0
std      15.0
min      35.0
25%      42.5
50%      50.0
75%      57.5
max      65.0
Name: length, dtype: float64

In [20]:
recent.groupby('animal')


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-20-be4a61fcb447> in <module>()
----> 1 recent.groupby('animal')

NameError: name 'recent' is not defined

In [21]:
%matplotlib inline

In [22]:
dogs['length'].hist()


Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x108cd6400>

In [23]:
dog.head(5).plot(kind='bar', x='name', y='length')


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-23-eecda305e2fb> in <module>()
----> 1 dog.head(5).plot(kind='bar', x='name', y='length')

NameError: name 'dog' is not defined

In [24]:
df['inches].plot(kind='barh', x='name', y='inches', legend=False)


  File "<ipython-input-24-c687044fd111>", line 1
    df['inches].plot(kind='barh', x='name', y='inches', legend=False)
                              ^
SyntaxError: invalid syntax

In [25]:
cat.head(5).plot(kind='barh', x='name', y='length', legend=False)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-25-d16e2c7fb97e> in <module>()
----> 1 cat.head(5).plot(kind='barh', x='name', y='length', legend=False)

NameError: name 'cat' is not defined

In [26]:
df = pd.read_excel('')


---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-26-8a28a580c8af> in <module>()
----> 1 df = pd.read_excel('')

/Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages/pandas/io/excel.py in read_excel(io, sheetname, header, skiprows, skip_footer, index_col, names, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, engine, squeeze, **kwds)
    168     """
    169     if not isinstance(io, ExcelFile):
--> 170         io = ExcelFile(io, engine=engine)
    171 
    172     return io._parse_excel(

/Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages/pandas/io/excel.py in __init__(self, io, **kwds)
    225             self.book = xlrd.open_workbook(file_contents=data)
    226         elif isinstance(io, compat.string_types):
--> 227             self.book = xlrd.open_workbook(io)
    228         else:
    229             raise ValueError('Must explicitly set engine if not passing in'

/Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages/xlrd/__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
    393         peek = file_contents[:peeksz]
    394     else:
--> 395         with open(filename, "rb") as f:
    396             peek = f.read(peeksz)
    397     if peek == b"PK\x03\x04": # a ZIP file

FileNotFoundError: [Errno 2] No such file or directory: ''

In [ ]:


In [ ]:


In [ ]: