In [3]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
!pip3 install xlrd


Requirement already satisfied (use --upgrade to upgrade): xlrd in /Users/sz2472/.virtualenvs/pandas/lib/python3.5/site-packages

In [5]:
df = pd.read_excel("richpeople.xlsx")

What country are most billionaires from? For the top ones, how many billionaires per billion people?


In [14]:
recent = df[df['year'] == 2014] #recent is a variable, a variable can be assigned to different things, here it was assigned to a data frame
recent.head()


Out[14]:
year name rank citizenship countrycode networthusbillion selfmade typeofwealth gender age ... relationshiptocompany foundingdate gdpcurrentus sourceofwealth notes notes2 source source_2 source_3 source_4
1 2014 A. Jerrold Perenchio 663 United States USA 2.6 self-made executive male 83.0 ... former chairman and CEO 1955.0 NaN television, Univision represented Marlon Brando and Elizabeth Taylor NaN http://en.wikipedia.org/wiki/Jerry_Perenchio http://www.forbes.com/profile/a-jerrold-perenc... COLUMN ONE; A Hollywood Player Who Owns the Ga... NaN
5 2014 Abdulla Al Futtaim 687 United Arab Emirates ARE 2.5 inherited inherited male NaN ... relation 1930.0 NaN auto dealers, investments company split between him and cousin in 2000 NaN http://en.wikipedia.org/wiki/Al-Futtaim_Group http://www.al-futtaim.ae/content/groupProfile.asp NaN NaN
6 2014 Abdulla bin Ahmad Al Ghurair 305 United Arab Emirates ARE 4.8 inherited inherited male NaN ... relation 1960.0 NaN diversified inherited from father NaN http://en.wikipedia.org/wiki/Al-Ghurair_Group http://www.alghurair.com/about-us/our-history NaN NaN
8 2014 Abdullah Al Rajhi 731 Saudi Arabia SAU 2.4 self-made self-made finance male NaN ... founder 1957.0 NaN banking NaN NaN http://en.wikipedia.org/wiki/Al-Rajhi_Bank http://www.alrajhibank.com.sa/ar/investor-rela... http://www.alrajhibank.com.sa/ar/about-us/page... NaN
9 2014 Abdulsamad Rabiu 1372 Nigeria NGA 1.2 self-made founder non-finance male 54.0 ... founder 1988.0 NaN sugar, flour, cement NaN NaN http://www.forbes.com/profile/abdulsamad-rabiu/ http://www.bloomberg.com/research/stocks/priva... NaN NaN

5 rows × 30 columns


In [8]:
recent.columns.values


Out[8]:
array(['year', 'name', 'rank', 'citizenship', 'countrycode',
       'networthusbillion', 'selfmade', 'typeofwealth', 'gender', 'age',
       'industry', 'IndustryAggregates', 'region', 'north',
       'politicalconnection', 'founder', 'generationofinheritance',
       'sector', 'company', 'companytype', 'relationshiptocompany',
       'foundingdate', 'gdpcurrentus', 'sourceofwealth', 'notes', 'notes2',
       'source', 'source_2', 'source_3', 'source_4'], dtype=object)

where are all the billionaires from?


In [9]:
recent['countrycode'].value_counts() #value_counts counts每个country出现的次数


Out[9]:
USA       499
CHN       152
RUS       111
DEU        85
BRA        65
IND        56
GBR        47
HKG        45
FRA        43
ITA        35
CAN        32
AUS        29
Taiwan     28
JPN        27
KOR        27
ESP        26
TUR        24
CHE        22
SWE        19
IDN        19
ISR        18
MEX        16
SGP        16
MYS        13
CHL        12
THA        11
PHL        10
AUT        10
NOR         9
UKR         9
         ... 
ARG         5
FIN         5
KAZ         5
POL         5
KWT         5
NGA         4
MAR         4
CYP         4
COL         4
ARE         4
GRC         3
PRT         3
VEN         3
MCO         3
BEL         3
OMN         2
NZL         2
MAC         2
LTU         1
AGO         1
GGY         1
UGA         1
ROU         1
VNM         1
SWZ         1
GEO         1
NPL         1
DZA         1
KNA         1
TZA         1
Name: countrycode, dtype: int64

In [10]:
recent.sort_values(by='networthusbillion', ascending=False).head(10)  #sort_values reorganizes the data basde on the by column


Out[10]:
year name rank citizenship countrycode networthusbillion selfmade typeofwealth gender age ... relationshiptocompany foundingdate gdpcurrentus sourceofwealth notes notes2 source source_2 source_3 source_4
284 2014 Bill Gates 1 United States USA 76.0 self-made founder non-finance male 58.0 ... founder 1975.0 NaN Microsoft NaN NaN http://www.forbes.com/profile/bill-gates/ NaN NaN NaN
348 2014 Carlos Slim Helu 2 Mexico MEX 72.0 self-made privatized and resources male 74.0 ... founder 1990.0 NaN telecom NaN NaN http://www.ozy.com/provocateurs/carlos-slims-w... NaN NaN NaN
124 2014 Amancio Ortega 3 Spain ESP 64.0 self-made founder non-finance male 77.0 ... founder 1975.0 NaN retail NaN NaN http://www.forbes.com/profile/amancio-ortega/ NaN NaN NaN
2491 2014 Warren Buffett 4 United States USA 58.2 self-made founder non-finance male 83.0 ... founder 1839.0 NaN Berkshire Hathaway NaN NaN http://www.forbes.com/lists/2009/10/billionair... http://www.forbes.com/companies/berkshire-hath... NaN NaN
1377 2014 Larry Ellison 5 United States USA 48.0 self-made founder non-finance male 69.0 ... founder 1977.0 NaN Oracle NaN NaN http://www.forbes.com/profile/larry-ellison/ http://www.businessinsider.com/how-larry-ellis... NaN NaN
509 2014 David Koch 6 United States USA 40.0 inherited inherited male 73.0 ... relation 1940.0 NaN diversified inherited from father NaN http://www.kochind.com/About_Koch/History_Time... NaN NaN NaN
381 2014 Charles Koch 6 United States USA 40.0 inherited inherited male 78.0 ... relation 1940.0 NaN diversified inherited from father NaN http://www.kochind.com/About_Koch/History_Time... NaN NaN NaN
2185 2014 Sheldon Adelson 8 United States USA 38.0 self-made self-made finance male 80.0 ... founder 1952.0 NaN casinos NaN NaN http://www.forbes.com/profile/sheldon-adelson/ http://lasvegassun.com/news/1996/nov/26/rat-pa... NaN NaN
429 2014 Christy Walton 9 United States USA 36.7 inherited inherited female 59.0 ... relation 1962.0 NaN Wal-Mart widow NaN http://www.forbes.com/profile/christy-walton/ NaN NaN NaN
1128 2014 Jim Walton 10 United States USA 34.7 inherited inherited male 66.0 ... relation 1962.0 NaN Wal-Mart inherited from father NaN http://www.forbes.com/profile/jim-walton/ NaN NaN NaN

10 rows × 30 columns

What's the average wealth of a billionaire? Male? Female?


In [15]:
recent['networthusbillion'].describe()
# the average wealth of a billionaire is $3.9 billion


Out[15]:
count    1653.000000
mean        3.904658
std         5.748520
min         1.000000
25%         1.400000
50%         2.100000
75%         3.700000
max        76.000000
Name: networthusbillion, dtype: float64

In [17]:
recent.groupby('gender')['networthusbillion'].describe()#group by is a function, group everything by gender, and show the billionnetworth
# female mean is 3.920556 billion
# male mean is 3.902716 billion


Out[17]:
gender       
female  count     180.000000
        mean        3.920556
        std         5.312604
        min         1.000000
        25%         1.400000
        50%         2.300000
        75%         3.700000
        max        36.700000
male    count    1473.000000
        mean        3.902716
        std         5.801227
        min         1.000000
        25%         1.400000
        50%         2.100000
        75%         3.700000
        max        76.000000
Name: networthusbillion, dtype: float64

Who is the poorest billionaire? Who are the top 10 poorest billionaires?


In [12]:
recent.sort_values(by='rank',ascending=False).head(10)


Out[12]:
year name rank citizenship countrycode networthusbillion selfmade typeofwealth gender age ... relationshiptocompany foundingdate gdpcurrentus sourceofwealth notes notes2 source source_2 source_3 source_4
1971 2014 Richard Chang 1565 Taiwan Taiwan 1.0 self-made self-made finance male 67.0 ... founder 1984.0 NaN real estate, electronics NaN NaN http://www.forbes.com/profile/richard-chang-1/ http://www.aseglobal.com/en/About/Milestones.asp NaN NaN
1834 2014 Pavel Tykac 1565 Czech Republic CZE 1.0 self-made privatized and resources male 49.0 ... investor 1995.0 NaN coal mine took advantage of Czech Republics privitizatio... NaN http://www.forbes.com/profile/pavel-tykac/ http://cs.wikipedia.org/wiki/Pavel_Tyka%C4%8D NaN NaN
2443 2014 Vivek Chaand Sehgal 1565 Australia AUS 1.0 self-made founder non-finance male 57.0 ... founder 1986.0 NaN auto parts Indian founder of Motherson Sumi, Australian c... NaN http://www.forbes.com/profile/vivek-chaand-seh... http://en.wikipedia.org/wiki/Motherson_Sumi_Sy... NaN Will Vivek Sehgal’s Gambit Pay Off? Rashmi K P...
1656 2014 Michael Kors 1565 United States USA 1.0 self-made founder non-finance male 54.0 ... founder 1981.0 NaN Retail NaN NaN http://en.wikipedia.org/wiki/Michael_Kors http://www.forbes.com/sites/briansolomon/2014/... NaN NaN
1089 2014 Jeanine Dick 1565 France FRA 1.0 inherited inherited female 77.0 ... relation 1968.0 NaN animal pharmaceuticals widow NaN http://www.forbes.com/profile/jeanine-dick/ NaN NaN NaN
343 2014 Carlos Martins 1565 Brazil BRA 1.0 self-made founder non-finance male 57.0 ... founder 1987.0 NaN education NaN NaN http://en.wikipedia.org/wiki/Carlos_Roberto_Ma... http://www.mh1.com.br/english/courses.asp NaN NaN
559 2014 Ding Shijia 1565 China CHN 1.0 self-made executive male 50.0 ... deputy chairman 1994.0 NaN shoes NaN NaN http://en.wikipedia.org/wiki/Anta_Sports http://en.wikipedia.org/wiki/Ding_Shijia http://www.bloomberg.com/research/stocks/peopl... NaN
560 2014 Ding Shizhong 1565 China CHN 1.0 self-made executive male 43.0 ... director 1966.0 NaN retail NaN NaN http://www.sce-re.com/en/about.asp http://www.forbes.com/profile/ding-shizhong/ NaN NaN
569 2014 Dmitry Korzhev 1565 Russia RUS 1.0 self-made founder non-finance male 50.0 ... founder 1993.0 NaN retail NaN NaN http://www.forbes.com/profile/dmitry-korzhev/ http://www.bloomberg.com/profiles/people/16388... NaN NaN
2168 2014 Sergei Tsikalyuk 1565 Russia RUS 1.0 self-made self-made finance male 55.0 ... founder 1992.0 NaN insurance NaN NaN http://www.forbes.com/profile/sergei-tsikalyuk/ http://www.bloomberg.com/research/stocks/priva... NaN NaN

10 rows × 30 columns

'What is relationship to company'? And what are the most common relationships?


In [19]:
recent['relationshiptocompany']


Out[19]:
1       former chairman and CEO
5                      relation
6                      relation
8                       founder
9                       founder
12                     relation
15                     relation
17                      founder
18                     relation
23                     relation
25                     relation
26                      founder
27                     relation
28                        owner
29                      founder
30                      founder
34                      founder
37                     relation
38                      founder
39                     relation
40                      founder
43                     relation
45                      founder
46                      founder
47                     relation
48                     relation
50                      founder
51                        owner
54                     relation
55                      founder
                 ...           
2584                    founder
2585                    founder
2586                    founder
2587                   relation
2588                   relation
2589                      owner
2590                    founder
2591                    founder
2592                      owner
2593                   chairman
2594                   chairman
2595                   chairman
2596          founder/president
2597                   chairman
2598                    founder
2599                    founder
2600                    founder
2601                        CEO
2602                      owner
2603                    founder
2604                    founder
2605                    founder
2606                    founder
2607                   chairman
2608                   investor
2609                    founder
2610                    founder
2611                    founder
2612                    founder
2613                    founder
Name: relationshiptocompany, dtype: object

In [20]:
recent['relationshiptocompany'].describe()
# the most common relationship to company is founder


Out[20]:
count        1651
unique         72
top       founder
freq          818
Name: relationshiptocompany, dtype: object

Most common source of wealth? Male vs. female?


In [21]:
recent['sourceofwealth'].describe()
# the most common source of wealth is real estate


Out[21]:
count            1636
unique            616
top       real estate
freq              107
Name: sourceofwealth, dtype: object

In [26]:
recent.groupby('gender')['sourceofwealth'].describe() #describe the content of a given column
# the most common source of wealth for male is real estate, while for female is diversified


Out[26]:
gender        
female  count             172
        unique            100
        top       diversified
        freq                9
male    count            1464
        unique            578
        top       real estate
        freq              100
Name: sourceofwealth, dtype: object

Given the richest person in a country, what % of the GDP is their wealth?


In [35]:
recent.sort_values(by='networthusbillion', ascending=False).head(10)['gdpcurrentus']


Out[35]:
284    NaN
348    NaN
124    NaN
2491   NaN
1377   NaN
509    NaN
381    NaN
2185   NaN
429    NaN
1128   NaN
Name: gdpcurrentus, dtype: float64

In [30]:
#From the website, I learned that the GDP for USA in 2014 is $17348 billion 
#from the previous dataframe, I learned that the richest USA billionaire made $76 billion networth
richest = 76
usa_gdp = 17348
percent = round(richest / usa_gdp * 100,2)
print(percent, "% of the US GDP is his wealth.")


0.44 % of the US GDP is his wealth.

Add up the wealth of all of the billionaires in a given country (or a few countries) and then compare it to the GDP of the country, or other billionaires, so like pit the US vs India


In [36]:
recent.groupby('countrycode')['networthusbillion'].sum().sort_values(ascending=False)
# USA is $2322 billion, compared to Russian is $422 billion


Out[36]:
countrycode
USA       2322.4
RUS        422.5
DEU        401.4
CHN        375.8
FRA        235.3
HKG        213.7
BRA        192.2
IND        191.9
ITA        158.1
GBR        152.0
MEX        142.9
ESP        122.6
SWE        116.7
CAN        112.8
JPN        101.0
AUS         85.4
CHE         80.2
Taiwan      75.8
KOR         60.7
MYS         53.1
ISR         51.8
SAU         49.0
IDN         47.8
SGP         45.1
TUR         43.2
CHL         41.3
PHL         40.1
THA         36.8
AUT         33.8
NGA         33.3
           ...  
EGY         15.6
ARE         14.6
FIN         13.3
POL         12.8
LBN         12.3
PER         11.9
ARG         11.3
PRT         10.6
NZL          9.8
KAZ          9.2
VEN          9.0
GRC          8.2
BEL          8.0
MAR          7.4
KWT          6.5
GEO          5.2
MCO          4.6
SWZ          3.7
AGO          3.7
DZA          3.2
MAC          2.8
GGY          2.4
OMN          2.3
VNM          1.6
ROU          1.2
KNA          1.2
NPL          1.1
UGA          1.1
TZA          1.0
LTU          1.0
Name: networthusbillion, dtype: float64

What are the most common industries for billionaires to come from? What's the total amount of billionaire money from each industry?


In [37]:
recent['sourceofwealth'].describe()


Out[37]:
count            1636
unique            616
top       real estate
freq              107
Name: sourceofwealth, dtype: object

In [39]:
recent.groupby('sourceofwealth')['networthusbillion'].sum().sort_values(ascending=False)


Out[39]:
sourceofwealth
diversified                        373.9
real estate                        321.2
retail                             310.4
investments                        234.4
media                              153.1
hedge funds                        152.1
Wal-Mart                           148.8
telecom                            115.8
pharmaceuticals                    115.6
banking                            106.7
Microsoft                           96.6
casinos                             85.2
mining                              82.2
Google                              79.7
construction                        77.5
candy                               70.9
software                            67.3
beer                                61.3
Berkshire Hathaway                  60.4
private equity                      56.4
Facebook                            50.3
money management                    48.5
Oracle                              48.0
steel                               46.9
oil, banking, telecom               43.8
oil & gas                           43.3
pipelines                           40.7
consumer goods                      40.7
H&M                                 39.2
beverages                           37.0
                                   ...  
dental implants                      1.1
Oil                                  1.1
Steel                                1.1
Xerox                                1.1
agriculture, refinery                1.1
Mirae                                1.1
communications                       1.1
banking, oil                         1.1
hydraulic lift machinery             1.1
energy, media, banking               1.1
finance, agriculture                 1.1
animal pharmaceuticals               1.0
Movies                               1.0
jewelry                              1.0
Spanx                                1.0
furniture retail                     1.0
telecom, investments                 1.0
food sweeteners                      1.0
retail, pharmacy                     1.0
Cisco                                1.0
electric linear systems              1.0
Celltrion                            1.0
real estate, electronics             1.0
auto import and dealerships          1.0
power, power engineering             1.0
banking, agriculture                 1.0
coal mine                            1.0
logistics                            1.0
copper processing & real estate      1.0
appliance retailer                   1.0
Name: networthusbillion, dtype: float64

In [ ]:
How old are billionaires? How old are billionaires self made vs. non self made? or different industries?
Who are the youngest billionaires? The oldest? Age distribution - maybe make a graph about it?
Maybe just made a graph about how wealthy they are in general?
Maybe plot their net worth vs age (scatterplot)
Make a bar graph of the top 10 or 20 richest

How many self made billionaires vs. others?


In [41]:
recent['selfmade'].value_counts()


Out[41]:
self-made    1146
inherited     505
Name: selfmade, dtype: int64

How old are billionaires? How old are billionaires self made vs. non self made? or different industries?


In [45]:
recent.sort_values(by='age',ascending=False).head()


Out[45]:
year name rank citizenship countrycode networthusbillion selfmade typeofwealth gender age ... relationshiptocompany foundingdate gdpcurrentus sourceofwealth notes notes2 source source_2 source_3 source_4
516 2014 David Rockefeller, Sr. 580 United States USA 2.9 inherited inherited male 98.0 ... relation 1870.0 NaN oil, banking family made most of fortune in the late 19th a... NaN http://en.wikipedia.org/wiki/David_Rockefeller http://en.wikipedia.org/wiki/Standard_Oil http://en.wikipedia.org/wiki/Rockefeller_family NaN
1277 2014 Karl Wlaschek 305 Austria AUT 4.8 self-made founder non-finance male 96.0 ... founder 1953.0 NaN retail NaN NaN http://en.wikipedia.org/wiki/BILLA http://en.wikipedia.org/wiki/Karl_Wlaschek https://www.billa.at/Footer_Nav_Seiten/Geschic... NaN
1328 2014 Kirk Kerkorian 328 United States USA 4.5 self-made self-made finance male 96.0 ... investor 1924.0 NaN casinos, investments purchased in 1969 NaN http://en.wikipedia.org/wiki/Kirk_Kerkorian http://www.forbes.com/profile/kirk-kerkorian/ PROFILE: Las Vegas billionaire amassed his wea... NaN
921 2014 Henry Hillman 687 United States USA 2.5 inherited inherited male 95.0 ... relation 1942.0 NaN investments inherited from father NaN http://www.forbes.com/profile/henry-hillman/ http://en.wikipedia.org/wiki/Calgon_Carbon NaN NaN
666 2014 Erika Pohl-Stroher 1154 Germany DEU 1.5 inherited inherited female 95.0 ... relation 1880.0 NaN hair products 3rd generation 23% stake in the company http://www.forbes.com/profile/erika-pohl-stroher/ http://en.wikipedia.org/wiki/Wella NaN NaN

5 rows × 30 columns


In [50]:
columns_want = recent[['name', 'age', 'selfmade','industry']]  #[[]]:dataframe
columns_want.head()


Out[50]:
name age selfmade industry
1 A. Jerrold Perenchio 83.0 self-made Media
5 Abdulla Al Futtaim NaN inherited Retail, Restaurant
6 Abdulla bin Ahmad Al Ghurair NaN inherited Diversified financial
8 Abdullah Al Rajhi NaN self-made Money Management
9 Abdulsamad Rabiu 54.0 self-made Consumer

In [ ]: