In [101]:
import pandas as pd
In [102]:
df = pd.read_excel("richpeople.xlsx")
In [103]:
df.head()
Out[103]:
In [104]:
df.columns
Out[104]:
In [105]:
df.columns.values
Out[105]:
In [106]:
df = df[df['year'] == 2014]
In [107]:
print("Most billionaires are from the following countries in descending order:")
df['countrycode'].value_counts().head(5)
Out[107]:
In [108]:
us = 903 / 1000000000
ger = 160 / 1000000000
china = 153 / 1000000000
russia = 119 / 1000000000
japan = 96 / 1000000000
print("per billion for us is", us, "for germany is", ger, "for china is", china, "for russia is", russia, "for japan is", japan)
In [109]:
df['networthusbillion'].describe()
Out[109]:
In [110]:
print("Average wealth of a billionaire is 3.531943")
In [111]:
male = df[df['gender'] == "male"]
In [112]:
male.head()
Out[112]:
In [113]:
male['networthusbillion'].describe()
Out[113]:
In [114]:
print("The average wealth of male billionaires is 3.516881")
In [115]:
female = df[df['gender'] == "female"]
In [116]:
female['networthusbillion'].describe()
Out[116]:
In [117]:
print("The average wealth of female billionaires is 3.819277")
In [118]:
print("Most common source of wealth are:")
df['sourceofwealth'].value_counts().head()
Out[118]:
In [119]:
print("Most common source of wealth for male billionaires are:")
male['sourceofwealth'].value_counts().head()
Out[119]:
In [120]:
print("Most common source of wealth for female billionaires are:")
female['sourceofwealth'].value_counts().head()
Out[120]:
In [121]:
bill = df.sort_values('networthusbillion', ascending=False).head(10)
In [122]:
df.sort_values('networthusbillion', ascending=False).head(10)
Out[122]:
In [123]:
print("A precise list of billionaires, wealth and rank is given below:")
columns_want = bill[['name', 'rank', 'networthusbillion']]
columns_want
Out[123]:
In [124]:
us_gdp = 7419
wealth_rich = 76
percent = round((wealth_rich * 100) / us_gdp)
print(percent, "% of the US GDP is their wealth")
In [125]:
print("the most common industries for billionaires to come from are:")
df['industry'].value_counts()
Out[125]:
In [126]:
columns_we_want = df[['name', 'networthusbillion', 'industry']]
columns_we_want
Out[126]:
In [127]:
print("the total amount of billionaire money from each industry are given below:")
columns_we_want.groupby('industry').describe()
Out[127]:
In [128]:
#columnswant = df[['name', 'networthusbillion', 'selfmade']]
#columnswant
In [129]:
print("The number of selfmade billionaires are:")
df['selfmade'].value_counts()
Out[129]:
In [130]:
columns_want = df[['name', 'age', 'selfmade']]
columns_want.head(10)
Out[130]:
In [131]:
columns_want = df[['name', 'age', 'industry']]
columns_want.head(10)
Out[131]:
In [132]:
columns_want.sort_values('age', ascending=False)
Out[132]:
In [133]:
is_in_us = df[df['countrycode'] == "USA"]
In [134]:
is_in_us['networthusbillion'].describe()
Out[134]:
In [135]:
print("The total wealth of billionaires in US is 903")
Compare the total wealth of billionaires in US to the GDP of the country, so like pit the US vs India¶
In [136]:
gdp_india = 2066.90
us_bill_wealth = 903
percent = round((us_bill_wealth * 100) / gdp_india)
print(percent, "% of the India GDP is the wealth of US billionaires")
In [137]:
df.sort_values('networthusbillion').head(10)
Out[137]:
In [138]:
print("The poorest billionaire is")
df.sort_values('networthusbillion').head(1)
Out[138]:
In [139]:
print("The ten youngest billionaires are: ")
df.sort_values('age').head(10)
Out[139]:
In [140]:
print("The ten oldest billionaires are: ")
df.sort_values('age', ascending=False).head(10)
Out[140]:
In [141]:
columns_want = df[['name', 'age', 'industry']]
columns_want.sort_values('age', ascending=False).head(10)
Out[141]:
In [142]:
import matplotlib.pyplot as plt
In [143]:
%matplotlib inline
In [144]:
df.plot(kind='scatter', x='age', y='networthusbillion')
Out[144]:
In [145]:
print("The most common relationships are:")
df['relationshiptocompany'].value_counts().head()
Out[145]:
In [146]:
print("Relationship to a company is describes the billionaire's relationship to the company primarily responsible for their wealth, such as founder, executive, relation, or shareholder")
In [151]:
columnswant
Out[151]:
In [158]:
sort_df = df.sort_values('networthusbillion')
In [159]:
sort_df.plot(kind='line', x='rank', y='networthusbillion')
Out[159]:
In [161]:
df.plot(kind='bar', x='name', y='networthusbillion')
Out[161]:
In [162]:
df.plot(kind='scatter', x='age', y='networthusbillion')
Out[162]:
In [163]:
df['networthusbillion'].head(10).plot(kind='bar', x='name', y='networthusbillion')
Out[163]:
In [ ]: