1.Import pandas with the right name:


In [12]:
import pandas as pd

2. Set all graphics from matplotlib to display inline


In [13]:
import matplotlib.pyplot as plt

In [14]:
%matplotlib inline

3. Read the csv in (it should be UTF-8 already so you don't have to worry about encoding), save it with the proper boring name


In [19]:
df = pd.read_csv("07-hw-animals.csv")

In [30]:
df


Out[30]:
animal name length
0 cat Anne 35
1 cat Bob 45
2 dog Egglesburg 65
3 dog Devon 50
4 cat Charlie 32
5 dog Fontaine 35

4. Display the names of the columns in the csv


In [20]:
df.columns


Out[20]:
Index(['animal', 'name', 'length'], dtype='object')

In [21]:
df.head()


Out[21]:
animal name length
0 cat Anne 35
1 cat Bob 45
2 dog Egglesburg 65
3 dog Devon 50
4 cat Charlie 32

5. Display the first 3 animals


In [22]:
df['animal'].head(3)


Out[22]:
0    cat
1    cat
2    dog
Name: animal, dtype: object

6. Sort the animals to see the 3 longest animals.


In [26]:
df.sort_values('length', ascending=False).head(3)


Out[26]:
animal name length
2 dog Egglesburg 65
3 dog Devon 50
1 cat Bob 45

7. What are the counts of the different values of the "animal" column? a.k.a. how many cats and how many dogs.


In [29]:
df['animal'].value_counts()


Out[29]:
cat    3
dog    3
Name: animal, dtype: int64

8. Only select the dogs


In [33]:
dogs = df[df['animal'] == "dog"]

In [34]:
dogs


Out[34]:
animal name length
2 dog Egglesburg 65
3 dog Devon 50
5 dog Fontaine 35

9. Display all of the animals that are greater than 40 cm.


In [37]:
animal_larger_40 = df['length'] > 40
animal_larger_40


Out[37]:
0    False
1     True
2     True
3     True
4    False
5    False
Name: length, dtype: bool

In [38]:
df[animal_larger_40]


Out[38]:
animal name length
1 cat Bob 45
2 dog Egglesburg 65
3 dog Devon 50

10. 'length' is the animal's length in cm. Create a new column called inches that is the length in inches.


In [39]:
df['length'].head()


Out[39]:
0    35
1    45
2    65
3    50
4    32
Name: length, dtype: int64

In [42]:
inch = df['length'] * 0.393701
inch


Out[42]:
0    13.779535
1    17.716545
2    25.590565
3    19.685050
4    12.598432
5    13.779535
Name: length, dtype: float64

In [41]:
inch = df['length'] / 2.54
inch


Out[41]:
0    13.779528
1    17.716535
2    25.590551
3    19.685039
4    12.598425
5    13.779528
Name: length, dtype: float64

In [43]:
df['length_inch'] = inch
df.head()


Out[43]:
animal name length length_inch
0 cat Anne 35 13.779535
1 cat Bob 45 17.716545
2 dog Egglesburg 65 25.590565
3 dog Devon 50 19.685050
4 cat Charlie 32 12.598432

11. Save the cats to a separate variable called "cats." Save the dogs to a separate variable called "dogs."


In [44]:
dogs =df[df['animal'] =="dog"]
dogs


Out[44]:
animal name length length_inch
2 dog Egglesburg 65 25.590565
3 dog Devon 50 19.685050
5 dog Fontaine 35 13.779535

In [45]:
cats =df[df['animal'] =="cat"]
cats


Out[45]:
animal name length length_inch
0 cat Anne 35 13.779535
1 cat Bob 45 17.716545
4 cat Charlie 32 12.598432

13. Display all of the animals that are cats and above 12 inches long. First do it using the "cats" variable, then do it using your normal dataframe.


In [48]:
cat = df['animal'] == "cat"
twelve_inch = df['length_inch'] > 12


df[cat & twelve_inch].head()


Out[48]:
animal name length length_inch
0 cat Anne 35 13.779535
1 cat Bob 45 17.716545
4 cat Charlie 32 12.598432

In [49]:
df[(df['animal'] == "cat") & (df['length_inch'] > 12)].head()


Out[49]:
animal name length length_inch
0 cat Anne 35 13.779535
1 cat Bob 45 17.716545
4 cat Charlie 32 12.598432

13. What's the mean length of a cat?


In [59]:
df[cat].describe()


Out[59]:
length length_inch
count 3.000000 3.000000
mean 37.333333 14.698171
std 6.806859 2.679867
min 32.000000 12.598432
25% 33.500000 13.188984
50% 35.000000 13.779535
75% 40.000000 15.748040
max 45.000000 17.716545

14. What's the mean length of a dog


In [63]:
dog = df['animal'] == "dog"
df[dog].describe()


Out[63]:
length length_inch
count 3.0 3.000000
mean 50.0 19.685050
std 15.0 5.905515
min 35.0 13.779535
25% 42.5 16.732292
50% 50.0 19.685050
75% 57.5 22.637808
max 65.0 25.590565

15. Use groupby to accomplish both of the above tasks at once.


In [64]:
df.groupby('animal').describe()


Out[64]:
length length_inch
animal
cat count 3.000000 3.000000
mean 37.333333 14.698171
std 6.806859 2.679867
min 32.000000 12.598432
25% 33.500000 13.188984
50% 35.000000 13.779535
75% 40.000000 15.748040
max 45.000000 17.716545
dog count 3.000000 3.000000
mean 50.000000 19.685050
std 15.000000 5.905515
min 35.000000 13.779535
25% 42.500000 16.732292
50% 50.000000 19.685050
75% 57.500000 22.637808
max 65.000000 25.590565

16. Make a histogram of the length of dogs.


In [68]:
dogs.hist()


Out[68]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x06CEDD30>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x06D0DE30>]], dtype=object)

17 Change your graphing style to be something else (anything else!)


In [69]:
dogs.plot(kind='line')


Out[69]:
<matplotlib.axes._subplots.AxesSubplot at 0x6e685d0>

18. Make a horizontal bar graph of the length of the animals, with their name as the label


In [72]:
df['length'].plot(kind='barh', x='lenth', y='name')


Out[72]:
<matplotlib.axes._subplots.AxesSubplot at 0x6e798b0>

In [73]:
df['length'].plot(kind='barh', labels= df['name'])


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-73-6c8df178f6a4> in <module>()
----> 1 df['length'].plot(kind='barh', labels= df['name'])

c:\users\radhika\appdata\local\programs\python\python35-32\lib\site-packages\pandas\tools\plotting.py in __call__(self, kind, ax, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, label, secondary_y, **kwds)
   3564                            colormap=colormap, table=table, yerr=yerr,
   3565                            xerr=xerr, label=label, secondary_y=secondary_y,
-> 3566                            **kwds)
   3567     __call__.__doc__ = plot_series.__doc__
   3568 

c:\users\radhika\appdata\local\programs\python\python35-32\lib\site-packages\pandas\tools\plotting.py in plot_series(data, kind, ax, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, label, secondary_y, **kwds)
   2643                  yerr=yerr, xerr=xerr,
   2644                  label=label, secondary_y=secondary_y,
-> 2645                  **kwds)
   2646 
   2647 

c:\users\radhika\appdata\local\programs\python\python35-32\lib\site-packages\pandas\tools\plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds)
   2439         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   2440 
-> 2441     plot_obj.generate()
   2442     plot_obj.draw()
   2443     return plot_obj.result

c:\users\radhika\appdata\local\programs\python\python35-32\lib\site-packages\pandas\tools\plotting.py in generate(self)
   1026         self._compute_plot_data()
   1027         self._setup_subplots()
-> 1028         self._make_plot()
   1029         self._add_table()
   1030         self._make_legend()

c:\users\radhika\appdata\local\programs\python\python35-32\lib\site-packages\pandas\tools\plotting.py in _make_plot(self)
   1969                 rect = self._plot(ax, self.ax_pos + (i + 0.5) * w, y, w,
   1970                                   start=start, label=label,
-> 1971                                   log=self.log, **kwds)
   1972             self._add_legend_handle(rect, label, index=i)
   1973 

c:\users\radhika\appdata\local\programs\python\python35-32\lib\site-packages\pandas\tools\plotting.py in _plot(cls, ax, x, y, w, start, log, **kwds)
   2003     @classmethod
   2004     def _plot(cls, ax, x, y, w, start=0, log=False, **kwds):
-> 2005         return ax.barh(x, y, w, left=start, log=log, **kwds)
   2006 
   2007     def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge):

c:\users\radhika\appdata\local\programs\python\python35-32\lib\site-packages\matplotlib\axes\_axes.py in barh(self, bottom, width, height, left, **kwargs)
   2287 
   2288         patches = self.bar(left=left, height=height, width=width,
-> 2289                            bottom=bottom, orientation='horizontal', **kwargs)
   2290         return patches
   2291 

c:\users\radhika\appdata\local\programs\python\python35-32\lib\site-packages\matplotlib\__init__.py in inner(ax, *args, **kwargs)
   1810                     warnings.warn(msg % (label_namer, func.__name__),
   1811                                   RuntimeWarning, stacklevel=2)
-> 1812             return func(ax, *args, **kwargs)
   1813         pre_doc = inner.__doc__
   1814         if pre_doc is None:

c:\users\radhika\appdata\local\programs\python\python35-32\lib\site-packages\matplotlib\axes\_axes.py in bar(self, left, height, width, bottom, **kwargs)
   2129                 label='_nolegend_'
   2130                 )
-> 2131             r.update(kwargs)
   2132             r.get_path()._interpolation_steps = 100
   2133             #print r.get_label(), label, 'label' in kwargs

c:\users\radhika\appdata\local\programs\python\python35-32\lib\site-packages\matplotlib\artist.py in update(self, props)
    854                 func = getattr(self, 'set_' + k, None)
    855                 if func is None or not six.callable(func):
--> 856                     raise AttributeError('Unknown property %s' % k)
    857                 func(v)
    858             changed = True

AttributeError: Unknown property labels

19. Make a sorted horizontal bar graph of the cats, with the larger cats on top.


In [74]:
cats =df[df['animal'] =="cat"]
cats


Out[74]:
animal name length length_inch
0 cat Anne 35 13.779535
1 cat Bob 45 17.716545
4 cat Charlie 32 12.598432

In [76]:
sort_cat = cats.sort_values('length', ascending=False)
sort_cat


Out[76]:
animal name length length_inch
1 cat Bob 45 17.716545
0 cat Anne 35 13.779535
4 cat Charlie 32 12.598432

In [79]:
sort_cat['length'].plot(kind='barh', x='length', y='name')


Out[79]:
<matplotlib.axes._subplots.AxesSubplot at 0x7e82210>

In [ ]: