In [29]:
import pandas as pd
startups = pd.read_csv('data/startups_1.csv', index_col=0)
startups[:3]
Out[29]:
In [30]:
#I'm considering only Acquisitions made in USA, with USD (dollars)
acquisitions = pd.read_csv('data/acquisitions.csv')
acquisitions = acquisitions[acquisitions['acquirer_country_code'] == 'USA']
acquisitions[:3]
Out[30]:
In [31]:
#acquirer_permalink
#rounds_agg = df_rounds.groupby(['company_permalink', 'funding_round_type'])['raised_amount_usd'].agg({'amount': [ pd.Series.sum, pd.Series.count]})
number_of_acquisitions = acquisitions.groupby(['acquirer_permalink'])['acquirer_permalink'].agg({'amount': [ pd.Series.count]}).reset_index()
number_of_acquisitions.columns = number_of_acquisitions.columns.droplevel()
number_of_acquisitions.columns = ['permalink', 'number_of_acquisitions']
number_of_acquisitions = number_of_acquisitions.set_index('permalink')
number_of_acquisitions[:3]
Out[31]:
In [32]:
investments = pd.read_csv('data/investments.csv')
investments = investments[investments['investor_country_code'] == 'USA']
investments[:3]
Out[32]:
In [33]:
#acquirer_permalink
#rounds_agg = df_rounds.groupby(['company_permalink', 'funding_round_type'])['raised_amount_usd'].agg({'amount': [ pd.Series.sum, pd.Series.count]})
number_of_investments = investments.groupby(['investor_permalink'])['investor_permalink'].agg({'amount': [ pd.Series.count]}).reset_index()
number_of_investments.columns = number_of_investments.columns.droplevel()
number_of_investments.columns = ['permalink', 'number_of_investments']
number_of_investments = number_of_investments.set_index('permalink')
number_of_investments[:3]
Out[33]:
In [34]:
#Number of different companies in which each company have invested in
number_of_unique_investments = investments.groupby(['investor_permalink'])['company_permalink'].agg({'amount': [ pd.Series.nunique]}).reset_index()
number_of_unique_investments.columns = number_of_unique_investments.columns.droplevel()
number_of_unique_investments.columns = ['permalink', 'number_of_unique_investments']
number_of_unique_investments = number_of_unique_investments.set_index('permalink')
number_of_unique_investments[:3]
Out[34]:
In [35]:
number_of_investors_per_round = investments.groupby(['company_permalink', 'funding_round_permalink'])['investor_permalink'].agg({'investor_permalink': [ pd.Series.count]}).reset_index()
number_of_investors_per_round.columns = number_of_investors_per_round.columns.droplevel(0)
number_of_investors_per_round.columns = ['company_permalink', 'funding_round_permalink', 'count']
number_of_investors_per_round = number_of_investors_per_round.groupby(['company_permalink']).agg({'count': [ pd.Series.mean]}).reset_index()
number_of_investors_per_round.columns = number_of_investors_per_round.columns.droplevel(0)
number_of_investors_per_round.columns = ['company_permalink', 'number_of_investors_per_round']
number_of_investors_per_round = number_of_investors_per_round.set_index('company_permalink')
number_of_investors_per_round[:3]
Out[35]:
In [36]:
from numpy import nanmean
#investments['raised_amount_usd'].dtype()
investments['raised_amount_usd'] = investments['raised_amount_usd'].astype(float)
avg_amount_invested_per_round = investments.groupby(['company_permalink', 'funding_round_permalink'])['raised_amount_usd'].agg({'raised_amount_usd': [ pd.Series.mean]}).reset_index()
avg_amount_invested_per_round.columns = avg_amount_invested_per_round.columns.droplevel(0)
avg_amount_invested_per_round.columns = ['company_permalink', 'funding_round_permalink', 'mean']
avg_amount_invested_per_round = avg_amount_invested_per_round.groupby(['company_permalink']).agg({'mean': [ pd.Series.mean]}).reset_index()
avg_amount_invested_per_round.columns = avg_amount_invested_per_round.columns.droplevel(0)
avg_amount_invested_per_round.columns = ['company_permalink', 'avg_amount_invested_per_round']
avg_amount_invested_per_round = avg_amount_invested_per_round.set_index('company_permalink')
avg_amount_invested_per_round = avg_amount_invested_per_round.fillna(0)
avg_amount_invested_per_round[:3]
Out[36]:
In [10]:
Out[10]:
In [37]:
startups = startups.join(number_of_acquisitions).join(number_of_investments).join(number_of_unique_investments).join(number_of_investors_per_round).join(avg_amount_invested_per_round)
In [48]:
startups[['number_of_acquisitions', 'number_of_investments', 'number_of_unique_investments','number_of_investors_per_round', 'avg_amount_invested_per_round']] = startups[['number_of_acquisitions', 'number_of_investments', 'number_of_unique_investments','number_of_investors_per_round', 'avg_amount_invested_per_round']].fillna(value=0)
In [49]:
startups[:3]
Out[49]:
In [50]:
startups.to_csv('data/startups_1_1.csv')