In [107]:

    
import pandas as pd



In [108]:

    
import matplotlib.pyplot as plt



In [109]:

    
%matplotlib inline



In [18]:

    
df = pd.read_csv("07-hw-animals.csv")
print(df)









    



  animal        name  length
0    cat        Anne      35
1    cat         Bob      45
2    dog  Egglesburg      65
3    dog       Devon      50
4    cat     Charlie      32
5    dog    Fontaine      35



In [10]:

    
print(df.columns.values)









    



['animal' 'name' 'length']



In [13]:

    
print(df['animal'])









    



0    cat
1    cat
2    dog
3    dog
4    cat
5    dog
Name: animal, dtype: object



In [19]:

    
print(df[:3])









    



  animal        name  length
0    cat        Anne      35
1    cat         Bob      45
2    dog  Egglesburg      65



In [27]:

    
print(df)









    



  animal        name  length
0    cat        Anne      35
1    cat         Bob      45
2    dog  Egglesburg      65
3    dog       Devon      50
4    cat     Charlie      32
5    dog    Fontaine      35



In [32]:

    
print(df.sort_values(by='length', ascending=0)[:3])









    



  animal        name  length
2    dog  Egglesburg      65
3    dog       Devon      50
1    cat         Bob      45



In [45]:

    
print(df['animal'])









    



0    cat
1    cat
2    dog
3    dog
4    cat
5    dog
Name: animal, dtype: object



In [66]:

    
print(df['animal'].value_counts())









    



cat    3
dog    3
Name: animal, dtype: int64



In [77]:

    
dogs = df[df['animal'] == 'dog']
dogs









    Out[77]:






  
    
      
      animal
      name
      length
    
  
  
    
      2
      dog
      Egglesburg
      65
    
    
      3
      dog
      Devon
      50
    
    
      5
      dog
      Fontaine
      35



In [78]:

    
df[df['length'] > 40]









    Out[78]:






  
    
      
      animal
      name
      length
    
  
  
    
      1
      cat
      Bob
      45
    
    
      2
      dog
      Egglesburg
      65
    
    
      3
      dog
      Devon
      50



In [81]:

    
df['inches'] = df['length'] * .394
df



In [90]:

    
cats = df[df['animal'] == 'cat']
cats



In [91]:

    
dogs = df[df['animal'] == 'dog']
dogs









    Out[91]:






  
    
      
      animal
      name
      length
      inches
    
  
  
    
      2
      dog
      Egglesburg
      65
      25.61
    
    
      3
      dog
      Devon
      50
      19.70
    
    
      5
      dog
      Fontaine
      35
      13.79



In [92]:

    
cats[cats['inches'] > 12]



In [99]:

    
df[df['inches'] > 12]
df[df['animal'] == 'cat']



In [103]:

    
cats['length'].mean()









    Out[103]:





37.333333333333336



In [104]:

    
dogs['length'].mean()









    Out[104]:





50.0



In [105]:

    
df.groupby('animal')['length'].mean()









    Out[105]:





animal
cat    37.333333
dog    50.000000
Name: length, dtype: float64



In [110]:

    
dogs['length'].hist()









    Out[110]:





<matplotlib.axes._subplots.AxesSubplot at 0x6103910>



In [111]:

    
dogs.plot(kind='scatter', x='length', y='inches')









    Out[111]:





<matplotlib.axes._subplots.AxesSubplot at 0x6220cd0>



In [113]:

    
df.plot(kind='barh', x='name', y='length', legend=False)









    Out[113]:





<matplotlib.axes._subplots.AxesSubplot at 0x7223bd0>



In [119]:

    
sortcats = (cats.sort_values(by='length', ascending=0))
sortcats.plot(kind='barh', x='name', y='length', legend=False, sort_columns=False)









    Out[119]:





<matplotlib.axes._subplots.AxesSubplot at 0x730ead0>



In [117]:

    
cats



In [5]:

    
import pandas as pd
df = pd.read_excel("richpeople.xlsx")

What country are most billionaires from? For the top ones, how many billionaires per billion people? Who are the top 10 richest billionaires? What's the average wealth of a billionaire? Male? Female? Who is the poorest billionaire? Who are the top 10 poorest billionaires? 'What is relationship to company'? And what are the most common relationships? Most common source of wealth? Male vs. female? Given the richest person in a country, what % of the GDP is their wealth? Add up the wealth of all of the billionaires in a given country (or a few countries) and then compare it to the GDP of the country, or other billionaires, so like pit the US vs India What are the most common industries for billionaires to come from? What's the total amount of billionaire money from each industry? How many self made billionaires vs. others? How old are billionaires? How old are billionaires self made vs. non self made? or different industries? Who are the youngest billionaires? The oldest? Age distribution - maybe make a graph about it? Maybe just made a graph about how wealthy they are in general? Maybe plot their net worth vs age (scatterplot) Make a bar graph of the top 10 or 20 richest

How many female billionaires are there compared to male? What industries are they from? What is their average wealth?



In [6]:

    
%matplotlib inline



In [7]:

    
print(df['gender'].value_counts())









    



male              2328
female             249
married couple       3
Name: gender, dtype: int64



In [8]:

    
df.groupby('gender')['networthusbillion'].mean()









    Out[8]:





gender
female            3.819277
male              3.516881
married couple    1.300000
Name: networthusbillion, dtype: float64



In [10]:

    
df.groupby('gender')['sourceofwealth'].value_counts()









    Out[10]:





gender  sourceofwealth                   
female  diversified                          9
        real estate                          7
        media                                6
        construction                         5
        consumer goods                       5
        hotels, investments                  5
        Wal-Mart                             4
        casinos                              4
        chemicals                            4
        cleaning products                    4
        Samsung                              3
        banking                              3
        commodities                          3
        mining                               3
        packaging                            3
        pipelines                            3
        retail                               3
        Campbell Soup                        2
        Cargill Inc.                         2
        bank, media                          2
        banking inheritance                  2
        coffee                               2
        financial services                   2
        hotels, restaurants                  2
        inherited, cosmetics                 2
        insurance                            2
        investments                          2
        medical equipment                    2
        paper                                2
        pharmaceuticals                      2
                                            ..
male    telecom, oil service, real estate    1
        telecom, oil, beer                   1
        telecoms                             1
        telecoms/lotteries/insurance         1
        television, Univision                1
        temp agency                          1
        textiles, apparel                    1
        timber/media                         1
        timberland, lumber mills             1
        tobacco                              1
        tobacco distribution, retail         1
        tobacco, banking                     1
        tools                                1
        tourism, construction                1
        tractors                             1
        trading company                      1
        transport                            1
        travel                               1
        vaccines                             1
        vacuums                              1
        venture capital, Google              1
        video cameras                        1
        videogames                           1
        water                                1
        water treatment systems              1
        web hosting                          1
        wind turbines                        1
        wine                                 1
        winter jackets                       1
        wrestling                            1
Name: sourceofwealth, dtype: int64

Let's make a graph 'bout it



In [12]:

    
df.plot(kind='scatter', x='gender', y='networthusbillion')









    



---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\indexes\base.py in get_loc(self, key, method, tolerance)
   1944             try:
-> 1945                 return self._engine.get_loc(key)
   1946             except KeyError:

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4066)()

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3930)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12408)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12359)()

KeyError: 'gender'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-12-3ef5d449adfa> in <module>()
----> 1 df.plot(kind='scatter', x='gender', y='networthusbillion')

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\tools\plotting.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   3738                           fontsize=fontsize, colormap=colormap, table=table,
   3739                           yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 3740                           sort_columns=sort_columns, **kwds)
   3741     __call__.__doc__ = plot_frame.__doc__
   3742 

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\tools\plotting.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   2612                  yerr=yerr, xerr=xerr,
   2613                  secondary_y=secondary_y, sort_columns=sort_columns,
-> 2614                  **kwds)
   2615 
   2616 

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\tools\plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds)
   2439         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   2440 
-> 2441     plot_obj.generate()
   2442     plot_obj.draw()
   2443     return plot_obj.result

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\tools\plotting.py in generate(self)
   1026         self._compute_plot_data()
   1027         self._setup_subplots()
-> 1028         self._make_plot()
   1029         self._add_table()
   1030         self._make_legend()

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\tools\plotting.py in _make_plot(self)
   1598         else:
   1599             label = None
-> 1600         scatter = ax.scatter(data[x].values, data[y].values, c=c_values,
   1601                              label=label, cmap=cmap, **self.kwds)
   1602         if cb:

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   1995             return self._getitem_multilevel(key)
   1996         else:
-> 1997             return self._getitem_column(key)
   1998 
   1999     def _getitem_column(self, key):

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
   2002         # get column
   2003         if self.columns.is_unique:
-> 2004             return self._get_item_cache(key)
   2005 
   2006         # duplicate columns & possible reduce dimensionality

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
   1348         res = cache.get(item)
   1349         if res is None:
-> 1350             values = self._data.get(item)
   1351             res = self._box_item_values(item, values)
   1352             cache[item] = res

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
   3288 
   3289             if not isnull(item):
-> 3290                 loc = self.items.get_loc(item)
   3291             else:
   3292                 indexer = np.arange(len(self.items))[isnull(self.items)]

c:\users\kate\appdata\local\programs\python\python35-32\lib\site-packages\pandas\indexes\base.py in get_loc(self, key, method, tolerance)
   1945                 return self._engine.get_loc(key)
   1946             except KeyError:
-> 1947                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   1948 
   1949         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4066)()

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3930)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12408)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12359)()

KeyError: 'gender'



In [ ]:

	animal	name	length	inches
0	cat	Anne	35	13.790
1	cat	Bob	45	17.730
2	dog	Egglesburg	65	25.610
3	dog	Devon	50	19.700
4	cat	Charlie	32	12.608
5	dog	Fontaine	35	13.790