In [ ]:
!pip3 install matplotlib
In [62]:
import pandas as pd
In [63]:
!pip3 install xlrd
In [64]:
df = pd.read_excel("richpeople.xlsx")
In [65]:
df.head(3)
Out[65]:
In [66]:
df.columns
Out[66]:
In [67]:
recent = df[df['year']==2014]
recent.head()
Out[67]:
In [68]:
df['citizenship'].value_counts().head(5)
#I am going to skip the second part of the question
#because we would have to create a new column with the number of people per country. Easier joining tables?
Out[68]:
In [69]:
recent.sort_values(by='rank').head(10)
Out[69]:
In [70]:
recent['networthusbillion'].describe()
Out[70]:
In [71]:
females = recent[recent['gender'] == 'female']
males = recent[recent['gender'] == 'male']
females['networthusbillion'].describe()
Out[71]:
In [72]:
males['networthusbillion'].describe()
Out[72]:
In [73]:
recent.sort_values(by='rank').tail(1)
Out[73]:
In [74]:
recent.sort_values(by='rank').tail(10)
Out[74]:
In [75]:
recent['relationshiptocompany'].value_counts().head(10)
Out[75]:
In [76]:
recent['sourceofwealth'].value_counts().head(10)
Out[76]:
In [118]:
females = recent[recent['gender'] == 'female']
males = recent[recent['gender'] == 'male']
females['sourceofwealth'].value_counts().head(10)
Out[118]:
In [119]:
males['sourceofwealth'].value_counts().head(10)
Out[119]:
In [77]:
recent['industry'].value_counts().head(10)
Out[77]:
In [78]:
recent.groupby('industry')['networthusbillion'].sum()
Out[78]:
In [79]:
recent['selfmade'].value_counts()
Out[79]:
In [80]:
billionaires_age = ['name', 'age']
recent[billionaires_age]
Out[80]:
In [81]:
recent.groupby('selfmade')['age'].describe()
Out[81]:
In [82]:
recent.groupby('industry')['age'].describe()
Out[82]:
In [83]:
recent.sort_values('age', ascending=True).head(10)
Out[83]:
In [84]:
df.sort_values('age', ascending=False).head(10)
Out[84]:
In [85]:
import matplotlib.pyplot as plt
In [86]:
%matplotlib inline
In [47]:
import matplotlib.pyplot as plt
plt.style.available
Out[47]:
In [68]:
plt.style.use('dark_background')
young_age_ordered = recent.sort_values('age', ascending=True).head(10)
young_age_ordered.plot(kind='scatter', x='age', y='networthusbillion')
#oops misread instructions
Out[68]:
In [69]:
old_age_ordered = recent.sort_values('age', ascending=False).head(10)
old_age_ordered.plot(kind='scatter', x='age', y='networthusbillion')
#oops misread instructions
Out[69]:
In [70]:
plt.style.use('seaborn-bright')
age_distribution = recent['age'].value_counts()
age_distribution.describe()
age_distribution.head(30).plot(kind='bar', x='', y='') #i am not sure how to comple x,y fields in this case
Out[70]:
In [72]:
recent.plot(kind='bar', x='name', y='networthusbillion')
#I know this is awful but looks cool lol
Out[72]:
In [65]:
ordered_by_wealth = recent.sort_values('networthusbillion', ascending=False)
ordered_by_wealth.head(30).plot(kind='bar', x='rank', y='networthusbillion', color=['g'])
Out[65]:
In [66]:
recent.plot(kind='scatter', x='age', y='networthusbillion')
Out[66]:
In [67]:
top_10 = recent.sort_values(by='networthusbillion', ascending=False).head(10)
top_10.plot(kind='barh', x='name', y='networthusbillion', color="r")
Out[67]:
In [ ]:
In [ ]: