In [28]:
import numpy as np
import pandas as pd

figsize(15, 5)

df = pd.read_csv('crunchbase.csv')

df.head(5)


Out[28]:
permalink name homepage_url category_code funding_total_usd status country_code state_code region city funding_rounds founded_at founded_month founded_quarter founded_year first_funding_at last_funding_at last_milestone_at
0 /company/canal-do-credito Canal do Credito http://www.canaldocredito.com.br finance 750000 operating BRA NaN Belo Horizonte Belo Horizonte 1 NaN NaN NaN NaN 1/1/10 1/1/10 NaN
1 /company/waywire #waywire http://www.waywire.com news 1750000 acquired USA NY New York New York 1 6/1/12 2012-06 2012-Q2 2012 6/30/12 6/30/12 10/17/13
2 /company/tv-communications &TV Communications http://enjoyandtv.com games_video 4000000 operating USA CA Los Angeles Los Angeles 2 NaN NaN NaN NaN 6/4/10 9/23/10 NaN
3 /company/in-touch-network (In)Touch Network http://www.InTouchNetwork.com ecommerce 1500000 operating GBR NaN London London 1 4/1/11 2011-04 2011-Q2 2011 4/1/11 4/1/11 11/15/13
4 /company/n-plusn #NAME? http://plusn.com software 600000 operating USA NY New York New York 1 1/1/12 2012-01 2012-Q1 2012 8/29/12 8/29/12 NaN

In [29]:
df = df.dropna(subset=['founded_year', 'category_code'])

In [30]:
df = df[ (df['founded_year'] >= 2000) & (df['founded_year'] <= 2013) ]

In [31]:
df = df[ df['region'] != 'unknown' ]

df['region'].value_counts().head(10).plot(kind='bar')


Out[31]:
<matplotlib.axes.AxesSubplot at 0x10d49ecd0>

In [32]:
df.groupby('region')['name'].count().order(ascending=False).head(5)


Out[32]:
region
SF Bay         4247
New York       1642
Boston         1047
Los Angeles     966
London          857
dtype: int64

In [33]:
num_companies = df.groupby('founded_year')['name'].count()
total_funding = df.groupby('founded_year')['funding_total_usd'].sum()

In [34]:
pd.DataFrame({ 'num_companies' : num_companies, 'total_funding' : total_funding }).plot(secondary_y='total_funding')


Out[34]:
<matplotlib.axes.AxesSubplot at 0x1086d2990>

In [35]:
sf_funding = df[df['region'] == 'SF Bay'].groupby('founded_year')['funding_total_usd'].sum()
bos_funding = df[df['region'] == 'Boston'].groupby('founded_year')['funding_total_usd'].sum()
ny_funding = df[df['region'] == 'New York'].groupby('founded_year')['funding_total_usd'].sum()

(sf_funding / total_funding).plot()
(bos_funding / total_funding).plot()
(ny_funding / total_funding).plot()


Out[35]:
<matplotlib.axes.AxesSubplot at 0x10b6eb310>

In [35]: