In [1]:
import pandas as pd
In [34]:
import matplotlib as plt
import matplotlib.pyplot as plt
% matplotlib inline
In [3]:
df = pd.read_csv("07-hw-animals.csv")
In [4]:
df
Out[4]:
In [5]:
df.columns.values
Out[5]:
In [6]:
df.head(3)
Out[6]:
In [7]:
df.sort_values(by='length', ascending = False).head(3)
Out[7]:
In [8]:
df['animal'].value_counts()
Out[8]:
In [9]:
df['animal'] == 'dog'
Out[9]:
In [10]:
df[df['animal'] == 'dog']
Out[10]:
In [11]:
df[df['length'] > 40]
Out[11]:
In [12]:
cm_in_inch = 0.393701
df['length_inches'] = df['length'] * cm_in_inch
df
Out[12]:
In [37]:
cats = df[df['animal'] == 'cat']
cats
Out[37]:
In [38]:
dogs = df[df['animal'] == 'dog']
dogs
Out[38]:
In [44]:
cats[cats['length_inches']> 12]
Out[44]:
In [16]:
#Using the normal dataframe
df[(df['animal'] == 'cat') & (df['length_inches'] > 12)]
Out[16]:
In [42]:
cats['length'].describe()['mean']
Out[42]:
In [43]:
dogs['length'].describe()['mean']
Out[43]:
In [19]:
animals = df.groupby(['animal'])
animals['length'].mean()
Out[19]:
In [53]:
dogs['length'].hist()
Out[53]:
In [54]:
plt.style.use('ggplot')
dogs['length'].hist()
Out[54]:
In [55]:
df.plot(kind='barh', x='name', y='length')
Out[55]:
In [56]:
cats.sort_values(by='length').plot(kind='barh', x='name', y='length')
Out[56]:
Answer your own selection out of the following questions, or any other questions you might be able to think of.
In [105]:
import pandas as pd
import matplotlib.pyplot as plt
% matplotlib inline
df = pd.read_csv('richpeople.csv', encoding='latin-1')
In [106]:
df.head(10)
richpeople = df[df['year'] == 2014]
richpeople.columns
Out[106]:
In [107]:
richpeople.sort_values(by='networthusbillion', ascending=False).head(10)
Out[107]:
In [108]:
richpeople.sort_values(by='networthusbillion').head(10)
Out[108]:
In [109]:
print("The average networth of billionaires in US billion is", richpeople['networthusbillion'].mean())
In [110]:
richpeople.groupby('gender')['networthusbillion'].mean()
Out[110]:
In [111]:
richpeople['citizenship'].value_counts()
Out[111]:
In [112]:
richpeople['industry'].value_counts()
Out[112]:
In [113]:
print("On average billionaires are", richpeople['age'].mean(), "years old.")
In [114]:
selfmade = richpeople[richpeople['selfmade'] == 'self-made']
print("Selfmade billionaires are about", selfmade['age'].mean(), "years old.")
In [115]:
non_selfmade = richpeople[richpeople['selfmade'] != 'self-made']
print("Non-selfmade billionaires are on average", non_selfmade['age'].mean(), "years old.")
In [116]:
richpeople.sort_values(by='age', ascending = True).head(3)
Out[116]:
In [117]:
richpeople.sort_values(by='age', ascending = False).head(3)
Out[117]:
In [119]:
plt.style.use('ggplot')
richpeople['age'].hist()
Out[119]:
In [127]:
richpeople.plot(kind='scatter', x = 'age', y='networthusbillion', figsize=(10,10), alpha=0.3)
Out[127]:
In [ ]: