notebook.community

Edit and run



In [28]:

    
import numpy as np
import pandas as pd

figsize(15, 5)

df = pd.read_csv('crunchbase.csv')

df.head(5)









    Out[28]:






  
    
      
      permalink
      name
      homepage_url
      category_code
      funding_total_usd
      status
      country_code
      state_code
      region
      city
      funding_rounds
      founded_at
       founded_month 
      founded_quarter
      founded_year
      first_funding_at
      last_funding_at
      last_milestone_at
    
  
  
    
      0
        /company/canal-do-credito
         Canal do Credito
       http://www.canaldocredito.com.br
           finance
        750000
       operating
       BRA
       NaN
       Belo Horizonte
       Belo Horizonte
       1
          NaN
             NaN
           NaN
        NaN
        1/1/10
        1/1/10
            NaN
    
    
      1
                 /company/waywire
                 #waywire
                 http://www.waywire.com
              news
       1750000
        acquired
       USA
        NY
             New York
             New York
       1
       6/1/12
        2012-06 
       2012-Q2
       2012
       6/30/12
       6/30/12
       10/17/13
    
    
      2
       /company/tv-communications
       &TV Communications
                  http://enjoyandtv.com
       games_video
       4000000
       operating
       USA
        CA
          Los Angeles
          Los Angeles
       2
          NaN
             NaN
           NaN
        NaN
        6/4/10
       9/23/10
            NaN
    
    
      3
        /company/in-touch-network
        (In)Touch Network
          http://www.InTouchNetwork.com
         ecommerce
       1500000
       operating
       GBR
       NaN
               London
               London
       1
       4/1/11
        2011-04 
       2011-Q2
       2011
        4/1/11
        4/1/11
       11/15/13
    
    
      4
                 /company/n-plusn
                   #NAME?
                       http://plusn.com
          software
        600000
       operating
       USA
        NY
             New York
             New York
       1
       1/1/12
        2012-01 
       2012-Q1
       2012
       8/29/12
       8/29/12
            NaN



In [29]:

    
df = df.dropna(subset=['founded_year', 'category_code'])



In [30]:

    
df = df[ (df['founded_year'] >= 2000) & (df['founded_year'] <= 2013) ]



In [31]:

    
df = df[ df['region'] != 'unknown' ]

df['region'].value_counts().head(10).plot(kind='bar')









    Out[31]:





<matplotlib.axes.AxesSubplot at 0x10d49ecd0>



In [32]:

    
df.groupby('region')['name'].count().order(ascending=False).head(5)









    Out[32]:





region
SF Bay         4247
New York       1642
Boston         1047
Los Angeles     966
London          857
dtype: int64



In [33]:

    
num_companies = df.groupby('founded_year')['name'].count()
total_funding = df.groupby('founded_year')['funding_total_usd'].sum()



In [34]:

    
pd.DataFrame({ 'num_companies' : num_companies, 'total_funding' : total_funding }).plot(secondary_y='total_funding')









    Out[34]:





<matplotlib.axes.AxesSubplot at 0x1086d2990>



In [35]:

    
sf_funding = df[df['region'] == 'SF Bay'].groupby('founded_year')['funding_total_usd'].sum()
bos_funding = df[df['region'] == 'Boston'].groupby('founded_year')['funding_total_usd'].sum()
ny_funding = df[df['region'] == 'New York'].groupby('founded_year')['funding_total_usd'].sum()

(sf_funding / total_funding).plot()
(bos_funding / total_funding).plot()
(ny_funding / total_funding).plot()









    Out[35]:





<matplotlib.axes.AxesSubplot at 0x10b6eb310>



In [35]:

	permalink	name	homepage_url	category_code	funding_total_usd	status	country_code	state_code	region	city	funding_rounds	founded_at	founded_month	founded_quarter	founded_year	first_funding_at	last_funding_at	last_milestone_at
0	/company/canal-do-credito	Canal do Credito	http://www.canaldocredito.com.br	finance	750000	operating	BRA	NaN	Belo Horizonte	Belo Horizonte	1	NaN	NaN	NaN	NaN	1/1/10	1/1/10	NaN
1	/company/waywire	#waywire	http://www.waywire.com	news	1750000	acquired	USA	NY	New York	New York	1	6/1/12	2012-06	2012-Q2	2012	6/30/12	6/30/12	10/17/13
2	/company/tv-communications	&TV Communications	http://enjoyandtv.com	games_video	4000000	operating	USA	CA	Los Angeles	Los Angeles	2	NaN	NaN	NaN	NaN	6/4/10	9/23/10	NaN
3	/company/in-touch-network	(In)Touch Network	http://www.InTouchNetwork.com	ecommerce	1500000	operating	GBR	NaN	London	London	1	4/1/11	2011-04	2011-Q2	2011	4/1/11	4/1/11	11/15/13
4	/company/n-plusn	#NAME?	http://plusn.com	software	600000	operating	USA	NY	New York	New York	1	1/1/12	2012-01	2012-Q1	2012	8/29/12	8/29/12	NaN