In [128]:
nonloans['isInternational']=(nonloans.fromCountry!=nonloans.toCountry)&(nonloans.fromCountry.notnull())&(nonloans.toCountry.notnull())
nonloans['isSameCountry']=(nonloans.countryOfBirth==nonloans.fromCountry)
nonloans['isPaid']=(nonloans.feeValue>0)
nonloans['isIntlPlayer']=(nonloans.intlCaps>0)
nonloans['isYouth']=nonloans['age']<21
s=nonloans[(nonloans.age>20)&(nonloans.season>1995)].groupby('season').id.count()#.plot.bar(stacked=True)
z=nonloans[(nonloans.season>1995)].groupby(['season','isYouth']).id.count().unstack()
x=nonloans.merge(countryInfo.marketTier.reset_index().rename(columns={'Country':'fromCountry'}),how='left')
t=x[(x.age>=21)&(x.season>1995)&(x.fromCountry.notnull())].groupby(['season','positionGroup'])['duration'].mean().unstack()
t#.plot.line()
#z.plot.bar(stacked=True)
Out[128]:
In [129]:
nonloans.id.nunique()
Out[129]:
In [ ]:
In [ ]:
nonloans[(nonloans.age>20)&(nonloans.season>1995)].groupby(['fromCountry','season'])
In [92]:
countryInfo.marketTier.reset_index().rename(columns={'Country':'fromCountry'}) #.groupby('marketTier').count()
Out[92]:
In [ ]:
In [8]:
fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(16, 16),sharex=False,sharey='row')
t=nonloans[(nonloans.fromCountry!=nonloans.toCountry)].rename(columns={'toCountry':'dest. country'}).groupby(['period','dest. country'])['feeValue'].count()
t['(1995, 2000]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[0,0],title='# of intl transfers, 1996-2000')
t['(2000, 2005]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[0,1],title='# of intl transfers,2001-2005')
t['(2005, 2010]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[0,2],title='# of intl transfers,2006-2010')
t['(2010, 2015]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[0,3],title='# of intl transfers,2011-2015')
s=nonloans[(nonloans.fromCountry!=nonloans.toCountry)].rename(columns={'toCountry':'dest. country'}).groupby(['period','dest. country'])['feeValue'].sum()
s['(1995, 2000]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[1,0],title='total intl fees, 1996-2000')
s['(2000, 2005]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[1,1],title='total intl fees, 2001-2005')
s['(2005, 2010]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[1,2],title='total intl fees, 2006-2010')
s['(2010, 2015]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[1,3],title='total intl fees, 2011-2015')
r=nonloans[(nonloans.fromCountry!=nonloans.toCountry)].rename(columns={'toCountry':'dest. country'}).groupby(['period','dest. country'])['feeValue'].mean()
r['(1995, 2000]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[2,0],title='avg intl fees, 1996-2000')
r['(2000, 2005]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[2,1],title='avg intl fees, 2001-2005')
r['(2005, 2010]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[2,2],title='avg intl fees, 2006-2010')
r['(2010, 2015]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[2,3],title='avg intl fees, 2011-2015')
plt.tight_layout()
In [7]:
fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(16, 16),sharex=False,sharey='row')
t=nonloans[(nonloans.fromCountry!=nonloans.toCountry)&(nonloans.toCountry.notnull())].rename(columns={'toTeamId':'dest. club'}).groupby(['period','dest. club'])['feeValue'].count().rename(index=teamIdMap)
t['(1995, 2000]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[0,0],title='# of intl transfers, 1996-2000')
t['(2000, 2005]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[0,1],title='# of intl transfers,2001-2005')
t['(2005, 2010]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[0,2],title='# of intl transfers,2006-2010')
t['(2010, 2015]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[0,3],title='# of intl transfers,2011-2015')
s=nonloans[(nonloans.fromCountry!=nonloans.toCountry)&(nonloans.toCountry.notnull())].rename(columns={'toTeamId':'dest. club'}).groupby(['period','dest. club'])['feeValue'].sum().rename(index=teamIdMap)
s['(1995, 2000]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[1,0],title='total intl fees, 1996-2000')
s['(2000, 2005]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[1,1],title='total intl fees, 2001-2005')
s['(2005, 2010]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[1,2],title='total intl fees, 2006-2010')
s['(2010, 2015]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[1,3],title='total intl fees, 2011-2015')
r=nonloans[(nonloans.fromCountry!=nonloans.toCountry)&(nonloans.toCountry.notnull())].rename(columns={'toTeamId':'dest. club'}).groupby(['period','dest. club'])['feeValue'].mean().rename(index=teamIdMap)
r['(1995, 2000]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[2,0],title='avg intl fees, 1996-2000')
r['(2000, 2005]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[2,1],title='avg intl fees, 2001-2005')
r['(2005, 2010]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[2,2],title='avg intl fees, 2006-2010')
r['(2010, 2015]'].sort_values(ascending=False)[:20].plot.bar(ax=axes[2,3],title='avg intl fees, 2011-2015')
plt.tight_layout()
In [162]:
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(18, 4),sharex=True,sharey='row')
country_coeff.join(natteam_coeff).plot.scatter('2012 coeff','National team coeff',ax=axes[0],title='2012 coefficient')
country_coeff.join(natteam_coeff).plot.scatter('2013 coeff','National team coeff',ax=axes[1],title='2013 coefficient')
country_coeff.join(natteam_coeff).plot.scatter('2014 coeff','National team coeff',ax=axes[2],title='2014 coefficient')
country_coeff.join(natteam_coeff).plot.scatter('2015 coeff','National team coeff',ax=axes[3],title='2015 coefficient')
Out[162]:
In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
from urllib2 import urlopen, Request
from bs4 import BeautifulSoup
from pandas import DataFrame
import pandas as pd
import numpy as np
import re
import os.path
import datetime
import math
import matplotlib.pyplot as plt
transfers = pd.read_csv('../data/merged/transfers_q.csv')
loans = pd.read_csv('../data/merged/loans.csv')
nonloans = pd.read_csv('../data/merged/nonloans.csv')
players=pd.read_csv('../data/merged/players.csv')
leagueCodeMapping = {'GB1':'Premier League (England)','FR1':'Ligue 1 (France)',
'ES1':'La Liga (Spain)','IT1':'Serie A (Italy)','L1':'Bundesliga (Germany)',
'PO1':'Primeira Liga (Portugal)','NL1':'Eredivisie (Netherlands)'}
teamIdMap = pd.concat([transfers[['toTeamId','toTeamName','season']].rename(columns={'toTeamId':'teamId','toTeamName':'teamName'}),
transfers[['fromTeamId','fromTeamName','season']].rename(columns={'fromTeamId':'teamId','fromTeamName':'teamName'})],ignore_index=True) \
.sort_values('season',ascending=False).drop_duplicates('teamId').set_index('teamId')['teamName'].to_dict()
agentIdMap = transfers[['playersAgentId','playersAgent']].sort_values('playersAgent',ascending=False).drop_duplicates('playersAgentId').set_index('playersAgentId')['playersAgent'].to_dict()
leagueList = ['GB1','ES1','L1','IT1','FR1','PO1','NL1','TR1','RU1','BE1','MLS1','UKR1','GR1','RO1','ZYP1','KR1','SER1','BOS1','BU1','DK1','SE1','NO1','LI1','SC1','ISR1','PL1','UNG1','A1','C1','MAL1','KAS1','TS1','SLO1','SL1','FI1','LUX1','AZ1','IR1']
countryInfo=pd.read_excel('../data/reference/countries.xls').set_index('country')
countryInfo.marketTier=np.where(countryInfo.marketTier==4,3,countryInfo.marketTier)
top20clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:20].index
top50clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:50].index
top100clubids=transfers.groupby('toTeamId')[['feeValue']].sum().merge(DataFrame({'teamName':teamIdMap}), left_index=True,right_index=True,how='left').sort_values('feeValue',ascending=False).iloc[:100].index
uefa=countryInfo[countryInfo.federation=='UEFA'].index
caf=countryInfo[countryInfo.federation=='CAF'].index
concacaf=countryInfo[countryInfo.federation=='CONCACAF'].index
concacaf_no_uscan=countryInfo[countryInfo.federation=='CONCACAF'].drop(['United States','Canada']).index
conmebol=countryInfo[countryInfo.federation=='CONMEBOL'].index
afc=countryInfo[countryInfo.federation=='AFC'].index
afc_no_anzac=countryInfo[countryInfo.federation=='AFC'].drop(['Australia','New Zealand']).index
country_coeff=pd.read_table('../data/reference/country_coefficient.csv').reset_index(drop=True)
country_coeff.columns=['Movement','Country','2012 coeff','2013 coeff','2014 coeff','2015 coeff','2016 coeff','Total coeff','Teams Total','CL','EL','Total']
country_coeff=country_coeff.drop(['Movement','2016 coeff','CL','EL','Total','Teams Total','Total coeff'],axis=1)
country_coeff['Country']=country_coeff['Country'].str.split('\(L',expand=True)[0].str.strip()
country_coeff=country_coeff.set_index('Country')
country_coeff = country_coeff.join(DataFrame({'total coeff':country_coeff.sum(axis=1)}))
natteam_coeff=pd.read_table('../data/reference/national_team_coefficient.csv').reset_index(drop=True).drop('Rank',axis=1)
natteam_coeff['Country']=natteam_coeff['Country'].str.strip()
natteam_coeff.Coefficient=natteam_coeff.Coefficient.apply(lambda s: int(s.replace(',','')))
natteam_coeff=natteam_coeff.rename(columns={'Coefficient':'National team coeff'}).set_index('Country')
club_coeff=pd.read_table('../data/reference/club_coefficient.csv').reset_index(drop=True).set_index('Club')
club_coeff=club_coeff.drop(['2017','2016','Mvmt','Total'],axis=1)
club_coeff.Association = club_coeff.Association.str.split(expand=True)[0]
# sanity check: club_coeff.merge(DataFrame({'Team Name':teamIdMap}),how='left',left_on='Team Id',right_index=True)
club_coeff=club_coeff.reset_index().set_index('Team Id').rename(columns=lambda c: 'Club coeff ' + c.split('–')[0] if '–' in c else c)
gdp=pd.read_table('../data/reference/country_gdp.csv').reset_index(drop=True).drop('2014 Rank',axis=1).rename(columns={'Country ':'Country'})
gdp.Country=gdp.Country.str.strip()
gdp=gdp.set_index('Country')
gdp.columns=['GDP '+c for c in gdp.columns.str.strip()]
for column in gdp.columns:
gdp[column]=gdp[column].apply(lambda s: float(str(s).replace(',','')))
unemployed=pd.read_table('../data/reference/country_unemployment.csv').drop(['Date','Employment rate'],axis=1)
unemployed.columns=['Country','Unemployment']
unemployed['Country']=unemployed['Country'].str.split(' ').apply(lambda arr: ' '.join(arr[:len(arr)/2]))
unemployed=unemployed.set_index('Country').rename(index={'United Kingdom':'England'})
corruption=pd.read_table('../data/reference/country_corruption.csv').drop(['Rank'],axis=1)
corruption.Country=corruption.Country.str.strip()
corruption=corruption.set_index('Country')
corruption=corruption[corruption.columns[:1]].rename(columns={'2015 score':'Corruption'}).rename(index={'United Kingdom':'England'})
population=pd.read_table('../data/reference/country_population.csv').drop(['Rank'],axis=1)
population.columns=population.columns.str.strip()
population.Country=population.Country.str.split('[',expand=True)[0].str.strip()
population['Population']=population['Population'].apply(lambda s: float(str(s).replace(',','')))
population=population[['Country','Population','Growth %']].set_index('Country').rename(index={'United Kingdom':'England'})
barometer=pd.read_table('../data/reference/country_corruption_barometer.csv').reset_index(drop=True)
barometer.columns=['Corruption Rank','Country','Bribe %']
barometer.Country=barometer.Country.str.strip()
barometer=barometer.set_index('Country').rename(index={'United Kingdom':'England'})
countryInfo=natteam_coeff.join(country_coeff).join(gdp).join(unemployed).join(corruption).join(population).join(countryInfo).join(barometer)
In [111]:
t=nonloans[(nonloans.season>2000)&(nonloans.fromCountry!=nonloans.toCountry)]
s=nonloans[(nonloans.season>2000)&(nonloans.age>21)]
meanFeeImportPeriod = t.groupby(['toCountry','period']).feeValue.mean().unstack().rename(columns=lambda col:'avg import fee '+col)
totalFeeImportPeriod = t.groupby(['toCountry','period']).feeValue.sum().unstack().rename(columns=lambda col:'total import fees '+col)
meanAgeImportPeriod = t.groupby(['toCountry','period']).age.mean().unstack().rename(columns=lambda col:'avg import age '+col)
paidImportPeriod = t[t.feeValue>0].groupby(['toCountry','period']).id.count().unstack().rename(columns=lambda col:'# of paid imports '+col)
freeImportPeriod = t[t.feeValue==0].groupby(['toCountry','period']).id.count().unstack().rename(columns=lambda col:'# of free imports '+col)
totalImportPeriod = t.groupby(['toCountry','period']).id.count().unstack().rename(columns=lambda col:'# of imports '+col)
meanFeeIExportPeriod = t.groupby(['fromCountry','period']).feeValue.mean().unstack().rename(columns=lambda col:'avg export fee '+col)
totalFeeExportPeriod = t.groupby(['fromCountry','period']).feeValue.sum().unstack().rename(columns=lambda col:'total export fees '+col)
meanAgeExportPeriod = t.groupby(['fromCountry','period']).age.mean().unstack().rename(columns=lambda col:'avg export age '+col)
paidExportPeriod = t[t.feeValue>0].groupby(['fromCountry','period']).id.count().unstack().rename(columns=lambda col:'# of paid exports '+col)
freeExportPeriod = t[t.feeValue==0].groupby(['fromCountry','period']).id.count().unstack().rename(columns=lambda col:'# of free exports '+col)
totalExportPeriod = t.groupby(['fromCountry','period']).id.count().unstack().rename(columns=lambda col:'# of exports '+col)
meanDurationPeriod = s.groupby(['fromCountry','period']).duration.mean().unstack().rename(columns=lambda col:'avg duration '+col)
volumePeriod = s.groupby(['fromCountry','period']).id.count().unstack().rename(columns=lambda col:'21-plus transfer volume '+col)
meanFeeImportAll = t.groupby(['toCountry'])[['feeValue']].mean().rename(columns=lambda col:'avg import fee overall')
totalFeeImportAll = t.groupby(['toCountry'])[['feeValue']].sum().rename(columns=lambda col:'total import fees overall')
meanAgeImportAll = t.groupby(['toCountry'])[['age']].mean().rename(columns=lambda col:'avg import age overall')
paidImportAll = t[t.feeValue>0].groupby(['toCountry'])[['id']].count().rename(columns=lambda col:'# of paid imports overall')
freeImportAll = t[t.feeValue==0].groupby(['toCountry'])[['id']].count().rename(columns=lambda col:'# of free imports overall')
totalImportAll = t.groupby(['toCountry'])[['id']].count().rename(columns=lambda col:'# of imports overall')
meanFeeExportAll = t.groupby(['fromCountry'])[['feeValue']].mean().rename(columns=lambda col:'avg export fee overall')
totalFeeExportAll = t.groupby(['fromCountry'])[['feeValue']].sum().rename(columns=lambda col:'total export fees overall')
meanAgeExportAll = t.groupby(['fromCountry'])[['age']].mean().rename(columns=lambda col:'avg export age overall')
paidExportAll = t[t.feeValue>0].groupby(['fromCountry'])[['id']].count().rename(columns=lambda col:'# of paid exports overall')
freeExportAll= t[t.feeValue==0].groupby(['fromCountry'])[['id']].count().rename(columns=lambda col:'# of free exports overall')
totalExportAll = t.groupby(['fromCountry'])[['id']].count().rename(columns=lambda col:'# of exports overall')
meanDurationAll = s.groupby(['fromCountry'])[['duration']].mean().rename(columns=lambda col:'avg duration overall')
volumeAll = s.groupby(['fromCountry'])[['id']].count().rename(columns=lambda col:'21-plus transfer volume overall')
fulljoin=countryInfo.join(meanFeeImportPeriod).join(totalFeeImportPeriod).join(meanAgeImportPeriod).join(paidImportPeriod).join(freeImportPeriod)\
.join(totalImportPeriod).join(meanFeeIExportPeriod).join(totalFeeExportPeriod).join(meanAgeExportPeriod).join(paidExportPeriod)\
.join(freeExportPeriod).join(totalExportPeriod).join(meanDurationPeriod).join(volumePeriod)\
.join(meanFeeImportAll).join(totalFeeImportAll).join(meanAgeImportAll).join(paidImportAll).join(freeImportAll)\
.join(totalImportAll).join(meanFeeExportAll).join(totalFeeExportAll).join(meanAgeExportAll).join(paidExportAll)\
.join(freeExportAll).join(totalExportAll).join(meanDurationAll).join(volumeAll)
fulljoin.to_excel('../data/output/country_summary.xlsx')
fulljoin
Out[111]:
In [62]:
t=nonloans[(nonloans.season>2000)&(nonloans.fromCountry!=nonloans.toCountry)]
s=nonloans[(nonloans.season>2000)&(nonloans.age>21)]
meanFeeImportPeriod = t.groupby(['toTeamId','period']).feeValue.mean().unstack().rename(columns=lambda col:'avg import fee '+col)
totalFeeImportPeriod = t.groupby(['toTeamId','period']).feeValue.sum().unstack().rename(columns=lambda col:'total import fees '+col)
meanAgeImportPeriod = t.groupby(['toTeamId','period']).age.mean().unstack().rename(columns=lambda col:'avg import age '+col)
paidImportPeriod = t[t.feeValue>0].groupby(['toTeamId','period']).id.count().unstack().rename(columns=lambda col:'# of paid imports '+col)
freeImportPeriod = t[t.feeValue==0].groupby(['toTeamId','period']).id.count().unstack().rename(columns=lambda col:'# of free imports '+col)
totalImportPeriod = t.groupby(['toTeamId','period']).id.count().unstack().rename(columns=lambda col:'# of imports '+col)
meanFeeIExportPeriod = t.groupby(['fromTeamId','period']).feeValue.mean().unstack().rename(columns=lambda col:'avg export fee '+col)
totalFeeExportPeriod = t.groupby(['fromTeamId','period']).feeValue.sum().unstack().rename(columns=lambda col:'total export fees '+col)
meanAgeExportPeriod = t.groupby(['fromTeamId','period']).age.mean().unstack().rename(columns=lambda col:'avg export age '+col)
paidExportPeriod = t[t.feeValue>0].groupby(['fromTeamId','period']).id.count().unstack().rename(columns=lambda col:'# of paid exports '+col)
freeExportPeriod = t[t.feeValue==0].groupby(['fromTeamId','period']).id.count().unstack().rename(columns=lambda col:'# of free exports '+col)
totalExportPeriod = t.groupby(['fromTeamId','period']).id.count().unstack().rename(columns=lambda col:'# of exports '+col)
meanDurationPeriod = s.groupby(['fromTeamId','period']).duration.mean().unstack().rename(columns=lambda col:'avg duration '+col)
meanFeeImportAll = t.groupby(['toTeamId'])[['feeValue']].mean().rename(columns=lambda col:'avg import fee overall')
totalFeeImportAll = t.groupby(['toTeamId'])[['feeValue']].sum().rename(columns=lambda col:'total import fees overall')
meanAgeImportAll = t.groupby(['toTeamId'])[['age']].mean().rename(columns=lambda col:'avg import age overall')
paidImportAll = t[t.feeValue>0].groupby(['toTeamId'])[['id']].count().rename(columns=lambda col:'# of paid imports overall')
freeImportAll = t[t.feeValue==0].groupby(['toTeamId'])[['id']].count().rename(columns=lambda col:'# of free imports overall')
totalImportAll = t.groupby(['toTeamId'])[['id']].count().rename(columns=lambda col:'# of imports overall')
meanFeeExportAll = t.groupby(['fromTeamId'])[['feeValue']].mean().rename(columns=lambda col:'avg export fee overall')
totalFeeExportAll = t.groupby(['fromTeamId'])[['feeValue']].sum().rename(columns=lambda col:'total export fees overall')
meanAgeExportAll = t.groupby(['fromTeamId'])[['age']].mean().rename(columns=lambda col:'avg export age overall')
paidExportAll = t[t.feeValue>0].groupby(['fromTeamId'])[['id']].count().rename(columns=lambda col:'# of paid exports overall')
freeExportAll= t[t.feeValue==0].groupby(['fromTeamId'])[['id']].count().rename(columns=lambda col:'# of free exports overall')
totalExportAll = t.groupby(['fromTeamId'])[['id']].count().rename(columns=lambda col:'# of exports overall')
meanDurationAll = s.groupby(['fromTeamId'])[['duration']].mean().rename(columns=lambda col:'avg duration overall')
clubjoin=club_coeff.join(meanFeeImportPeriod).join(totalFeeImportPeriod).join(meanAgeImportPeriod).join(paidImportPeriod).join(freeImportPeriod)\
.join(totalImportPeriod).join(meanFeeIExportPeriod).join(totalFeeExportPeriod).join(meanAgeExportPeriod).join(paidExportPeriod)\
.join(freeExportPeriod).join(totalExportPeriod).join(meanDurationPeriod)\
.join(meanFeeImportAll).join(totalFeeImportAll).join(meanAgeImportAll).join(paidImportAll).join(freeImportAll)\
.join(totalImportAll).join(meanFeeIExportAll).join(totalFeeExportAll).join(meanAgeExportAll).join(paidExportAll)\
.join(freeExportAll).join(totalExportAll).join(meanDurationAll)
ew = pd.ExcelWriter('../data/output/top_club_summary.xls', encoding='utf-8')
j=clubjoin.merge(fulljoin,how='left',left_on='Association',right_index=True,suffixes=('',', country'))
j.to_excel(ew)
ew.save()
In [2]:
%ls ../data/reference/GCB2013_DataPack
In [18]:
bribes=pd.read_excel('../data/reference/GCB2013_DataPack/GCB2013_Data.xls',sheetname=7,skiprows=3)
bribes=bribes[['Country']+[bribes.columns[-1]]][1:].set_index('Country')
bribes.ix['Germany']
Out[18]:
In [134]:
t=countryInfo.join(nonloans[(nonloans.toCountry!=nonloans.fromCountry)|(nonloans.age>21)].groupby(['toCountry','season'])['id'].count().unstack()).groupby('marketTier').sum().T[-20:] #.plot.line()
t[3]=t[3]+t[4]
t=t.drop(4,axis=1)
s=t.sum(axis=1)
t[1]=t[1]/s
t[2]=t[2]/s
t[3]=t[3]/s
t.plot.area(stacked=True,figsize=(10,10))
Out[134]:
In [109]:
countryInfo['Corruption Level']=countryInfo['Corruption'].apply(lambda x: math.floor(x/20))
countryInfo['Corruption Level']=countryInfo['Corruption Rank'].fillna(1).apply(lambda x: math.floor(x/10))
t=countryInfo.join(nonloans.groupby(['toCountry','season'])['id'].count().unstack())
t=t[t['marketTier']==2].groupby('Corruption Level').sum().T[-20:] #.plot.line()
s=t.sum(axis=1)
for c in t.columns:
t[c]=t[c]/s
t.plot.line(stacked=False,figsize=(10,10))
Out[109]:
In [121]:
countryInfo[countryInfo.marketTier>=3].count()
Out[121]:
In [132]:
nonloans[(nonloans.feeValue>0)&(nonloans.fromCountry!=nonloans.toCountry)].groupby('season').feeValue.sum().plot.line()
#nonloans[nonloans.fromCountry!=nonloans.toCountry].groupby('season').feeValue.mean().plot.line()
Out[132]:
In [135]:
countryInfo['marketTier'].sort_values()
Out[135]: