In [28]:
import numpy as np
import pandas as pd
figsize(15, 5)
df = pd.read_csv('crunchbase.csv')
df.head(5)
Out[28]:
In [29]:
df = df.dropna(subset=['founded_year', 'category_code'])
In [30]:
df = df[ (df['founded_year'] >= 2000) & (df['founded_year'] <= 2013) ]
In [31]:
df = df[ df['region'] != 'unknown' ]
df['region'].value_counts().head(10).plot(kind='bar')
Out[31]:
In [32]:
df.groupby('region')['name'].count().order(ascending=False).head(5)
Out[32]:
In [33]:
num_companies = df.groupby('founded_year')['name'].count()
total_funding = df.groupby('founded_year')['funding_total_usd'].sum()
In [34]:
pd.DataFrame({ 'num_companies' : num_companies, 'total_funding' : total_funding }).plot(secondary_y='total_funding')
Out[34]:
In [35]:
sf_funding = df[df['region'] == 'SF Bay'].groupby('founded_year')['funding_total_usd'].sum()
bos_funding = df[df['region'] == 'Boston'].groupby('founded_year')['funding_total_usd'].sum()
ny_funding = df[df['region'] == 'New York'].groupby('founded_year')['funding_total_usd'].sum()
(sf_funding / total_funding).plot()
(bos_funding / total_funding).plot()
(ny_funding / total_funding).plot()
Out[35]:
In [35]: