In [3]:
# utility functions for accessing transfermarkt.com

from urllib2 import urlopen, Request
from bs4 import BeautifulSoup
from pandas import DataFrame
import pandas as pd
import numpy as np
import re
import os.path 
import datetime 

tier1=["GB1", "ES1", "IT1", "L1", "FR1", "TR1", "RU1", "PO1", "NL1", "BE1", "MLS1", 
            "UKR1", "GR1", "C1", "RO1", "KR1", "A1", "TS1", "DK1", "PL1", "SE1", "SER1", "NO1", "ISR1", "SC1", "BU1", "ZYP1"]
tier2=["GB2", "ES2", "IT2", "L2", "FR2", "TR2", "RU2", "PO2", "NL2", "BE2", "NAFC"]
tier3=["GB3", "L3", "IT3A", "IT3B", "IT3C"]
youthLeagues=["GB21", "AJ1", "AJ2", "AJ3", "ITJ1", "ITJ2", "ITJ3"]
otherEurope = ["KAS1", "BOS1", "UNG1", "WER1", "SL1", "SLO1", "GEO1", "AZ1", "ALB1", "MOL1", 
                           "FI1", "MAZ1", "MNE1", "LI1", "ARM1", "EST1", "LUX1", "LET1", "MAL1", "IS1", "IR1", "WAL1","NIR1", "FARO"]
world=["CSL", "JL11", "UAE1", "QSL", "RSK1", "IRN1", "SA1", "AUS1", "THA1", "NZL1", "HGKG", "LIB1", "IND1", "SIN1", "INIL", "PHI1",
             "EGY1", "SFA1", "ALG1", "TUN1", "MAR1", "GHPL"]
americas=["BRA1", "AR1N", "MEX1", "COLP", "ECPE", "CHL1", "URU1", "PERC", "PDV1", "VZ1A", "BRA2"]
defunct=["EFD2","ENSD","GBJG"]
deep=["RLW3","RLSW","RLN3","RLN4","RLB3","ES3A","ES3B","ES3C","ES3D","510","511",
            "TR3A","TR3B","TR4A","TR4B","TR4C", "GB4","CNAT", "NLAJ", "NLAE"]

# transfermarkt blocks default useragent
useragent = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'
# "lol" can be anything - transfermarkt ignores it/uses it for semantic url
urlprefix = 'http://www.transfermarkt.com/lol/'

def getUrlByLeagueId(id, season=2015):
    return urlprefix + 'startseite/wettbewerb/' + id + '/saison_id/' + str(season)

def getUrlByClubId(id, season=2015):
    return urlprefix + 'startseite/verein/' + str(id) + '/saison_id/' + str(season)

def getTableUrlByLeagueId(id, season=2015):
    return urlprefix + 'tabelle/wettbewerb/' + id + '/saison_id/' + str(season)

def getUrlByPlayerId(id):
    return urlprefix + 'profil/spieler/' + str(id)

def getUrlByAgentId(id):
    return urlprefix + 'beraterfirma/berater/' + str(id)

def getBsByPlayerId(id):
        return BeautifulSoup(urlopen(Request(getUrlByPlayerId(id), headers={'User-Agent': useragent})))

def getClubsByLeagueId(id, season=2015):
    bs = BeautifulSoup(urlopen(Request(getUrlByLeagueId(id, season), headers={'User-Agent': useragent})))
    elements = bs.find(id='yw1').find_all("td",class_="hauptlink no-border-links hide-for-small hide-for-pad")
    return [{'clubId': e.find("a")["id"], 'name': e.getText().strip(u'\xa0 ')} for e in elements]

def getPlayersByClubId(id, season=2015):
    bs = BeautifulSoup(urlopen(Request(getUrlByClubId(id, season), headers={'User-Agent': useragent})))
    elements = bs.find(id='yw1').find_all("span",class_="hide-for-small")
    return [{'playerId': e.find("a", class_="spielprofil_tooltip")["id"], 
             'name': e.getText()} for e in elements if e.find("a", class_="spielprofil_tooltip")]

def getTransfersFromBs(bs):
    elements = bs.find(class_="transferhistorie").find_all("tr",class_="zeile-transfer")
    dicts = [{'seasonDate': "  ".join([td.getText() for td in e.findAll("td")[:2]]),
      'mv': e.find("td",class_="zelle-mw").getText(),
      'fee': e.find("td",class_="zelle-abloese").getText(),
      'countries': dict(zip(['from','to'],([country.find("img").get("title")
                                    for country in e.find_all("td", class_="no-border-rechts no-border-links flagge hide-for-small") if country.find("img")]))),
      'teams': dict(zip(['from','to'],([{'teamId': team.find("a")["id"], 'name': team.getText()}
                                    for team in e.find_all("td", class_="hauptlink no-border-links hide-for-small vereinsname")])))} for e in elements]
    return [{'season': d['seasonDate'].split("  ")[0],
       'date': d['seasonDate'].split("  ")[1],
       'mv': d['mv'], 'fee': d['fee'],
       'fromTeamId': d['teams']['from']['teamId'],
       'fromTeamName': d['teams']['from']['name'].lstrip(),
       'toTeamId': d['teams']['to']['teamId'],
       'toTeamName': d['teams']['to']['name'].lstrip(),
       'fromCountry': d['countries'].get('from'),
       'toCountry': d['countries'].get('to'),
        'playerName': bs.find("h1").getText()} for d in dicts]

def getPlayerDataFromBs(bs):
    elements = bs.find(class_="spielerdaten").find_all("tr") 
    result1 = [{'key': e.find("th").getText().strip().rstrip(':'),'value': e.find("td").getText().strip(), 
            'country': (e.find('img',class_="flaggenrahmen").get("title")) if e.find('img',class_="flaggenrahmen") else "",
            'hrefs' : [a.get("href") for a in e.findAll("a")]} for e in elements]

    result2 = [{'key':e['key'], 'value': (e['value'] 
           + (" COUNTRY:" + e['country'] if e['key'] == 'Place of birth' else '')
           + (" HREFs: " + " ".join(e['hrefs']) if len(e['hrefs']) > 0 else '')
           + ("SOCMED: " + "####".join(e['hrefs']) if len(e['hrefs']) > 0 else '')).strip()
          } for e in result1]

    result3 = dict([(e['key'],e['value']) for e in result2])
    
    result3["Display name"] = bs.find("h1").getText()
    print "now processing " + result3["Display name"].encode('utf-8')
    if bs.find("div",class_="detailpositionen"):
        result3["Detailed position"] = re.sub('\s+', ' ',bs.find("div",class_="detailpositionen").getText().strip())
    if bs.find("span",itemprop="birthDate"):
        result3["birthDate"] = re.sub('\s+', ' ',bs.find("span",itemprop="birthDate").getText().strip())
    if "Date of birth" in result3:
        result3["Date of birth"] = result3["Date of birth"].split("HREFs:")[0].strip()
    if "Current club" in result3:
        result3["Current club id"] = result3["Current club"].split("/verein/")[-1]
        result3["Current club"] = result3["Current club"].split("HREFs:")[0].strip()
    if "Outfitter" in result3:
        result3["Outfitter"] = result3["Outfitter"].split("HREFs:")[0].strip()
    if "Shoe model" in result3:
        result3["Shoe model"] = result3["Shoe model"].split("HREFs:")[0].strip()
    if "Glove" in result3:
        result3["Glove"] = result3["Glove"].split("HREFs:")[0].strip()
    if "Player's agent" in result3:
        result3["Player's agent id"] = result3["Player's agent"].split("/")[-1].strip()
        result3["Player's agent"] = result3["Player's agent"].split("HREFs:")[0].strip()
    if "Place of birth" in result3:
        result3["Country of birth"] = result3["Place of birth"].split("COUNTRY:")[-1].strip()
        result3["Place of birth"] = result3["Place of birth"].split("COUNTRY:")[0].strip()
    if "Nationality" in result3:
        result3["Nationality"] = re.sub('\s+', ' ', result3["Nationality"])
    if "on loan from" in result3:
        result3["on loan from club id"] = result3["on loan from"].split("/verein/")[-1].strip()
        result3["on loan from"] = result3["on loan from"].split("HREFs:")[0].strip()
    if "2nd club" in result3:
        result3["2nd club id"] = result3["2nd club"].split("/verein/")[-1].strip()
        result3["2nd club"] = result3["2nd club"].split("HREFs:")[0].strip()
    if "Social media" in result3:    
        socialmedia = dict([(url.split("://")[-1].split("/")[0].split(".com")[0].split(".")[-1].strip(), url) \
                            for url in result3["Social media"].split("SOCMED: ")[-1].split("####") if url.startswith("http")])    
        for platform in ['twitter', 'facebook', 'instagram']:
            if platform in socialmedia:
                result3[platform]=socialmedia[platform]
        if len([x for x in socialmedia if not x in ['twitter', 'facebook', 'instagram']]) > 0:
            result3['website']=socialmedia[[x for x in socialmedia if not x in ['twitter', 'facebook', 'instagram']][0]]
        del result3["Social media"]
    nationalTeamLinks=[a for a in bs.find(class_="dataContent").findAll("a") if '/nationalmannschaft/' in a.get("href")]
    if nationalTeamLinks:
        result3['Intl caps/goals'] = '/'.join([a.getText() for a in nationalTeamLinks])
        result3['National team id'] = nationalTeamLinks[0].get("href").split("/")[-1]
    
    return result3

def getTableByLeagueId(id, season=2015):
    try:
        bs = BeautifulSoup(urlopen(Request(getTableUrlByLeagueId(id, season), headers={'User-Agent': useragent})))
        realSeason = int(bs.find('a', class_='vereinprofil_tooltip')['href'].split('/')[-1])
        if season != realSeason:
            return DataFrame([{'league': id, 'season': season,'error': 'Season not found'}])[['league','season','error']]
        trs=[tr.find_all('td') for tr in bs.find(class_='responsive-table').find('tbody').find_all('tr')]
        df=DataFrame([{'league': id, 'season': season, 'rank': int(tr[0].getText().strip()), 'clubId': int(tr[2].find(class_='vereinprofil_tooltip')['id']),
            'clubName': tr[2].find(class_='vereinprofil_tooltip').getText(),
            'matches': int(tr[3].getText()),'wins': int(tr[4].getText()),'draws': int(tr[5].getText()),'losses': int(tr[6].getText()),
            'goalsFor': int(tr[7].getText().split(':')[0]), 'goalsAgainst': int(tr[7].getText().split(':')[-1]),
            'goalDiff': int(tr[8].getText()), 'pointsRaw': tr[9].getText()} for tr in trs])
        df['points']=(3*df['wins'])+(df['draws'])
    except Exception as e:
        return DataFrame([{'league': id, 'season': season,'error': e}])[['league','season','error']]
    return df[['league','season','rank','clubId','clubName','matches','wins','draws','losses','goalsFor','goalsAgainst','goalDiff','points','pointsRaw']]

In [4]:
%matplotlib inline
import matplotlib
matplotlib.style.use('ggplot')

In [31]:
def getClubsWithSizeByLeagueId(id, season=2015):
    bs = BeautifulSoup(urlopen(Request(getUrlByLeagueId(id, season), headers={'User-Agent': useragent})))
    elements = bs.find(id='yw1').find_all("a")
    return [{'clubId': int(e['href'].split('/')[4]), 'clubName': e['href'].split('/')[1], 'size': int(e.getText().strip(u'\xa0 '))} for e in elements if 'kader/verein' in e['href'] and not ' ' in e.getText()and not '-' in e.getText()]

In [32]:
getClubsWithSizeByLeagueId('IR1')


Out[32]:
[{'clubId': 920, 'clubName': 'derry-city', 'size': 29},
 {'clubId': 1189, 'clubName': 'st-patricks-athletic', 'size': 25},
 {'clubId': 3258, 'clubName': 'shamrock-rovers', 'size': 19},
 {'clubId': 6066, 'clubName': 'dundalk-fc', 'size': 20},
 {'clubId': 8780, 'clubName': 'sligo-rovers', 'size': 22},
 {'clubId': 9211, 'clubName': 'bohemian-football-club-dublin', 'size': 23},
 {'clubId': 27774, 'clubName': 'cork-city-fc', 'size': 26},
 {'clubId': 5568, 'clubName': 'finn-harps', 'size': 25},
 {'clubId': 4287, 'clubName': 'longford-town-fc', 'size': 25},
 {'clubId': 3930, 'clubName': 'bray-wanderers', 'size': 28},
 {'clubId': 42394, 'clubName': 'galway-united-fc', 'size': 24},
 {'clubId': 26765, 'clubName': 'wexford-youths-fc', 'size': 24}]

In [33]:
#LeagueClubSizes={}
#for league in tier1 + otherEurope:
for league in ['NIR1','IR1']:
    try:
        df = DataFrame(getClubsWithSizeByLeagueId(league))
        df['league']=league
        LeagueClubSizes[league]=df
        print league, 'done'
    except Exception as e:
        df = DataFrame([{'league': league, 'error': e}])
        LeagueClubSizes[league]=df
        print league, 'failed with error', e


NIR1 done
IR1 done

In [53]:
pd.concat(LeagueClubSizes.values()).to_csv('../data/merged/LeagueClubSizes.csv',encoding='utf-8',index=False)

In [52]:
pd.concat(LeagueClubSizes.values())['size'].hist()


Out[52]:
<matplotlib.axes._subplots.AxesSubplot at 0x109d33550>

In [3]:
def getCountryCoefficientsForSeason(season):
    url = 'http://www.uefa.com/memberassociations/uefarankings/country/season='+str(season)+'/index.html'
    bs = BeautifulSoup(urlopen(Request(url, headers={'User-Agent': useragent})))
    table = bs.find('table', id='associationrank')
    columns = ['Rank','Movement','Country']+[th.getText() for th in table.find('thead').findAll('th')][3:]
    result = DataFrame([[td.getText() for td in tr.findAll('td')] for tr in table.find('tbody').findAll('tr')])
    result.columns = columns
    result = result.drop('Movement', axis=1)
    result = result.rename(columns={'Pts': 'Pts '+ str()})
    for col in result.columns:
        if '/' in col: result[col]=result[col].apply(float)
    return result[result.columns[1:-2]].set_index('Country')

def getClubCoefficientsForSeason(season):
    url = 'http://www.uefa.com/memberassociations/uefarankings/club/season='+str(season)+'/index.html'
    bs = BeautifulSoup(urlopen(Request(url, headers={'User-Agent': useragent})))
    table = bs.find('table', id='clubrank')
    columns= ['Club','Country']+[th.getText() for th in table.find('thead').findAll('th')][2:]
    result = DataFrame([[';'.join([span.getText() for span in tr.find('td').findAll('span')])]+[td.getText() for td in tr.findAll('td')[1:]] for tr in table.find('tbody').findAll('tr')])
    result.columns = columns
    result['Rank']=result['Club'].str.split(';',expand=True)[0]
    result= result[['Rank']+columns]
    result['Club']=result['Club'].str.split(';',expand=True)[2]
    result = result.rename(columns={'Pts': 'Pts '+ str()})
    for col in result.columns:
        if '/' in col: result[col]=result[col].apply(float)
    return result[result.columns[1:-1]].set_index(['Club','Country'])

In [ ]:
url = 'http://www.uefa.com/memberassociations/uefarankings/country/index.html'
bs = BeautifulSoup(urlopen(Request(url, headers={'User-Agent': useragent})))
DataFrame([{'countryCode': img['src'].split('/')[-1].split('.')[0].upper(),'countryName': img['title']} for (img,td) in ((td.find('img'),td) for td in bs.findAll('td',class_='l countrycode'))])

In [168]:
#getCountryCoefficientsForSeason(2010)
clubs2005=getClubCoefficientsForSeason(2005)
clubs2010=getClubCoefficientsForSeason(2010)
clubs2015=getClubCoefficientsForSeason(2015)
clubs2016=getClubCoefficientsForSeason(2016)[['15/16']]
clubCoefficients=clubs2005.join(clubs2010,how='outer').join(clubs2015,how='outer').join(clubs2016,how='outer').fillna(0)

In [124]:
countries2003=getCountryCoefficientsForSeason(2003)
countries2008=getCountryCoefficientsForSeason(2008)
countries2013=getCountryCoefficientsForSeason(2013)
countries2016=getCountryCoefficientsForSeason(2016)[['13/14','14/15','15/16']]
countryCoefficients=countries2003.join(countries2008,how='outer').join(countries2013,how='outer').join(countries2016,how='outer').fillna(0)

In [126]:
countryCoefficients.to_csv('../data/reference/country_coefficient2.csv')

In [211]:
def gini(list_of_values):
    sorted_list = sorted([x for x in list_of_values if x>0])[:200]
    height, area = 0, 0
    for value in sorted_list:
        height += value
        area += height - value / 2.
    fair_area = height * len(list_of_values) / 2.
    return (fair_area - area) / fair_area

In [ ]:
#clubCoefficients.apply(lambda list_of_values: len([x for x in list_of_values if x>0])).plot.line()
clubCoefficients.apply(gini).plot.line()

In [125]:
cl=getClubCoefficientsForSeason(2010)
co=getCountryCoefficientsForSeason(2010)

In [123]:
def getClubCoefficientsPage(page):
    url = 'http://www.transfermarkt.com/statistik/klubrangliste?page='+str(page)
    bs = BeautifulSoup(urlopen(Request(url, headers={'User-Agent': useragent})))
    table = bs.find('div', id='yw1').find('table')
    data=[[(td.getText().strip(),td.find('a').get('href') if td.find('a') else None) for td in tr.findAll('td')] for tr in table.find('tbody').findAll('tr') if tr.get("class")]    
    result= DataFrame([{'rank':int(row[0][0]), 'clubId':int(row[1][1].split('/')[-1]), 'clubName': row[1][0],
                       'coeff12': row[5][0], 'coeff13': row[6][0], 'coeff14': row[7][0], 'coeff15': row[8][0], 'points':row[10][0]} for row in data])
    for season in range(12,16):
        result['coeff'+str(season)]=result['coeff'+str(season)].apply(lambda s: 0 if s == '-' else int(s.replace('.','')))
    result['points']=result['points'].apply(lambda s: int(s.replace('.','')))
    return result

result=pd.concat(getClubCoefficientsPage(i) for i in range(1,19)).reset_index(drop=True)
result.to_csv('../data/reference/club_coefficients.csv',encoding='utf-8')

In [106]:
result=result

In [122]:


In [55]:
[u'#', u'Club(s)', u'Country', u'12/13', u'13/14', u'14/15', u'15/16', u'16/17', u'Points'].remove('Country')

In [53]:
[[(td.getText().strip(),td.find('a').get('href') if td.find('a') else None) for td in tr.findAll('td')] for tr in t.findAll('tr') if tr.get("class")]


Out[53]:
[[(u'426', None),
  (u'Newtown', '/newtown-afc/startseite/verein/31130'),
  (u'', '/newtown-afc/startseite/verein/31130'),
  (u'Newtown', '/newtown/startseite/verein/31130'),
  (u'', None),
  (u'100', '/jumplist/pokalhistorie/verein/31130'),
  (u'150', '/jumplist/pokalhistorie/verein/31130'),
  (u'25', '/jumplist/pokalhistorie/verein/31130'),
  (u'800', '/jumplist/pokalhistorie/verein/31130'),
  (u'', '/jumplist/pokalhistorie/verein/31130'),
  (u'1.075', '/jumplist/pokalhistorie/verein/31130')],
 [(u'427', None),
  (u'Tre Penne', '/sp-tre-penne/startseite/verein/10747'),
  (u'', '/sp-tre-penne/startseite/verein/10747'),
  (u'Tre Penne', '/tre-penne/startseite/verein/10747'),
  (u'', None),
  (u'500', '/jumplist/pokalhistorie/verein/10747'),
  (u'566', '/jumplist/pokalhistorie/verein/10747'),
  (u'-', '/jumplist/pokalhistorie/verein/10747'),
  (u'-', '/jumplist/pokalhistorie/verein/10747'),
  (u'', '/jumplist/pokalhistorie/verein/10747'),
  (u'1.066', '/jumplist/pokalhistorie/verein/10747')],
 [(u'428', None),
  (u'FC Gagra', '/fc-gagra/startseite/verein/22842'),
  (u'', '/fc-gagra/startseite/verein/22842'),
  (u'FC Gagra', '/fc-gagra/startseite/verein/22842'),
  (u'', None),
  (u'300', '/jumplist/pokalhistorie/verein/22842'),
  (u'375', '/jumplist/pokalhistorie/verein/22842'),
  (u'250', '/jumplist/pokalhistorie/verein/22842'),
  (u'125', '/jumplist/pokalhistorie/verein/22842'),
  (u'', '/jumplist/pokalhistorie/verein/22842'),
  (u'1.050', '/jumplist/pokalhistorie/verein/22842')],
 [(u'429', None),
  (u'Sliema Wander.', '/sliema-wanderers/startseite/verein/329'),
  (u'', '/sliema-wanderers/startseite/verein/329'),
  (u'Sliema Wander.', '/sliema-wander-/startseite/verein/329'),
  (u'', None),
  (u'175', '/jumplist/pokalhistorie/verein/329'),
  (u'425', '/jumplist/pokalhistorie/verein/329'),
  (u'275', '/jumplist/pokalhistorie/verein/329'),
  (u'175', '/jumplist/pokalhistorie/verein/329'),
  (u'', '/jumplist/pokalhistorie/verein/329'),
  (u'1.050', '/jumplist/pokalhistorie/verein/329')],
 [(u'430', None),
  (u'Drogheda United', '/drogheda-united-fc/startseite/verein/4277'),
  (u'', '/drogheda-united-fc/startseite/verein/4277'),
  (u'Drogheda United', '/drogheda-united/startseite/verein/4277'),
  (u'', None),
  (u'200', '/jumplist/pokalhistorie/verein/4277'),
  (u'300', '/jumplist/pokalhistorie/verein/4277'),
  (u'400', '/jumplist/pokalhistorie/verein/4277'),
  (u'140', '/jumplist/pokalhistorie/verein/4277'),
  (u'', '/jumplist/pokalhistorie/verein/4277'),
  (u'1.040', '/jumplist/pokalhistorie/verein/4277')],
 [(u'431', None),
  (u'Bohemians', '/bohemian-football-club-dublin/startseite/verein/9211'),
  (u'', '/bohemian-football-club-dublin/startseite/verein/9211'),
  (u'Bohemians', '/bohemians/startseite/verein/9211'),
  (u'', None),
  (u'450', '/jumplist/pokalhistorie/verein/9211'),
  (u'50', '/jumplist/pokalhistorie/verein/9211'),
  (u'400', '/jumplist/pokalhistorie/verein/9211'),
  (u'140', '/jumplist/pokalhistorie/verein/9211'),
  (u'', '/jumplist/pokalhistorie/verein/9211'),
  (u'1.040', '/jumplist/pokalhistorie/verein/9211')],
 [(u'432', None),
  (u'Cork City', '/cork-city-fc/startseite/verein/27774'),
  (u'', '/cork-city-fc/startseite/verein/27774'),
  (u'Cork City', '/cork-city/startseite/verein/27774'),
  (u'', None),
  (u'200', '/jumplist/pokalhistorie/verein/27774'),
  (u'50', '/jumplist/pokalhistorie/verein/27774'),
  (u'400', '/jumplist/pokalhistorie/verein/27774'),
  (u'390', '/jumplist/pokalhistorie/verein/27774'),
  (u'', '/jumplist/pokalhistorie/verein/27774'),
  (u'1.040', '/jumplist/pokalhistorie/verein/27774')],
 [(u'433', None),
  (u'Santos Tartu', '/fc-santos-tartu/startseite/verein/34711'),
  (u'', '/fc-santos-tartu/startseite/verein/34711'),
  (u'Santos Tartu', '/santos-tartu/startseite/verein/34711'),
  (u'', None),
  (u'75', '/jumplist/pokalhistorie/verein/34711'),
  (u'200', '/jumplist/pokalhistorie/verein/34711'),
  (u'550', '/jumplist/pokalhistorie/verein/34711'),
  (u'200', '/jumplist/pokalhistorie/verein/34711'),
  (u'', '/jumplist/pokalhistorie/verein/34711'),
  (u'1.025', '/jumplist/pokalhistorie/verein/34711')],
 [(u'434', None),
  (u'Siauliai', '/fk-siauliai/startseite/verein/5970'),
  (u'', '/fk-siauliai/startseite/verein/5970'),
  (u'Siauliai', '/siauliai/startseite/verein/5970'),
  (u'', None),
  (u'475', '/jumplist/pokalhistorie/verein/5970'),
  (u'250', '/jumplist/pokalhistorie/verein/5970'),
  (u'100', '/jumplist/pokalhistorie/verein/5970'),
  (u'150', '/jumplist/pokalhistorie/verein/5970'),
  (u'', '/jumplist/pokalhistorie/verein/5970'),
  (u'975', '/jumplist/pokalhistorie/verein/5970')],
 [(u'435', None),
  (u'Banga Gargzdai', '/banga-gargzdai/startseite/verein/23559'),
  (u'', '/banga-gargzdai/startseite/verein/23559'),
  (u'Banga Gargzdai', '/banga-gargzdai/startseite/verein/23559'),
  (u'', None),
  (u'225', '/jumplist/pokalhistorie/verein/23559'),
  (u'250', '/jumplist/pokalhistorie/verein/23559'),
  (u'350', '/jumplist/pokalhistorie/verein/23559'),
  (u'150', '/jumplist/pokalhistorie/verein/23559'),
  (u'', '/jumplist/pokalhistorie/verein/23559'),
  (u'975', '/jumplist/pokalhistorie/verein/23559')],
 [(u'436', None),
  (u'Mogren Budva', '/mogren-budva/startseite/verein/748'),
  (u'', '/mogren-budva/startseite/verein/748'),
  (u'Mogren Budva', '/mogren-budva/startseite/verein/748'),
  (u'', None),
  (u'275', '/jumplist/pokalhistorie/verein/748'),
  (u'250', '/jumplist/pokalhistorie/verein/748'),
  (u'150', '/jumplist/pokalhistorie/verein/748'),
  (u'200', '/jumplist/pokalhistorie/verein/748'),
  (u'', '/jumplist/pokalhistorie/verein/748'),
  (u'875', '/jumplist/pokalhistorie/verein/748')],
 [(u'437', None),
  (u'UN K\xe4erj\xe9ng 97', '/un-kaerjeng-97/startseite/verein/12004'),
  (u'', '/un-kaerjeng-97/startseite/verein/12004'),
  (u'UN K\xe4erj\xe9ng 97', '/un-kaerjeng-97/startseite/verein/12004'),
  (u'', None),
  (u'275', '/jumplist/pokalhistorie/verein/12004'),
  (u'300', '/jumplist/pokalhistorie/verein/12004'),
  (u'100', '/jumplist/pokalhistorie/verein/12004'),
  (u'150', '/jumplist/pokalhistorie/verein/12004'),
  (u'', '/jumplist/pokalhistorie/verein/12004'),
  (u'825', '/jumplist/pokalhistorie/verein/12004')],
 [(u'438', None),
  (u'Llanelli', '/llanelli-afc/startseite/verein/11915'),
  (u'', '/llanelli-afc/startseite/verein/11915'),
  (u'Llanelli', '/llanelli/startseite/verein/11915'),
  (u'', None),
  (u'350', '/jumplist/pokalhistorie/verein/11915'),
  (u'150', '/jumplist/pokalhistorie/verein/11915'),
  (u'25', '/jumplist/pokalhistorie/verein/11915'),
  (u'300', '/jumplist/pokalhistorie/verein/11915'),
  (u'', '/jumplist/pokalhistorie/verein/11915'),
  (u'825', '/jumplist/pokalhistorie/verein/11915')],
 [(u'439', None),
  (u'Aberystwyth', '/aberystwyth-town-fc/startseite/verein/21541'),
  (u'', '/aberystwyth-town-fc/startseite/verein/21541'),
  (u'Aberystwyth', '/aberystwyth/startseite/verein/21541'),
  (u'', None),
  (u'100', '/jumplist/pokalhistorie/verein/21541'),
  (u'150', '/jumplist/pokalhistorie/verein/21541'),
  (u'275', '/jumplist/pokalhistorie/verein/21541'),
  (u'300', '/jumplist/pokalhistorie/verein/21541'),
  (u'', '/jumplist/pokalhistorie/verein/21541'),
  (u'825', '/jumplist/pokalhistorie/verein/21541')],
 [(u'440', None),
  (u'Cefn Druids', '/cefn-druids/startseite/verein/22502'),
  (u'', '/cefn-druids/startseite/verein/22502'),
  (u'Cefn Druids', '/cefn-druids/startseite/verein/22502'),
  (u'', None),
  (u'350', '/jumplist/pokalhistorie/verein/22502'),
  (u'150', '/jumplist/pokalhistorie/verein/22502'),
  (u'25', '/jumplist/pokalhistorie/verein/22502'),
  (u'300', '/jumplist/pokalhistorie/verein/22502'),
  (u'', '/jumplist/pokalhistorie/verein/22502'),
  (u'825', '/jumplist/pokalhistorie/verein/22502')],
 [(u'441', None),
  (u'AC Libertas', '/ac-libertas/startseite/verein/10616'),
  (u'', '/ac-libertas/startseite/verein/10616'),
  (u'AC Libertas', '/ac-libertas/startseite/verein/10616'),
  (u'', None),
  (u'250', '/jumplist/pokalhistorie/verein/10616'),
  (u'316', '/jumplist/pokalhistorie/verein/10616'),
  (u'250', '/jumplist/pokalhistorie/verein/10616'),
  (u'-', '/jumplist/pokalhistorie/verein/10616'),
  (u'', '/jumplist/pokalhistorie/verein/10616'),
  (u'816', '/jumplist/pokalhistorie/verein/10616')],
 [(u'442', None),
  (u'SS Folgore/Falciano', '/ss-folgore-falciano/startseite/verein/11479'),
  (u'', '/ss-folgore-falciano/startseite/verein/11479'),
  (u'SS Folgore/Falciano', '/ss-folgore-falciano/startseite/verein/11479'),
  (u'', None),
  (u'-', '/jumplist/pokalhistorie/verein/11479'),
  (u'66', '/jumplist/pokalhistorie/verein/11479'),
  (u'250', '/jumplist/pokalhistorie/verein/11479'),
  (u'500', '/jumplist/pokalhistorie/verein/11479'),
  (u'', '/jumplist/pokalhistorie/verein/11479'),
  (u'816', '/jumplist/pokalhistorie/verein/11479')],
 [(u'443', None),
  (u'Floriana', '/floriana-fc/startseite/verein/10603'),
  (u'', '/floriana-fc/startseite/verein/10603'),
  (u'Floriana', '/floriana/startseite/verein/10603'),
  (u'', None),
  (u'425', '/jumplist/pokalhistorie/verein/10603'),
  (u'175', '/jumplist/pokalhistorie/verein/10603'),
  (u'25', '/jumplist/pokalhistorie/verein/10603'),
  (u'175', '/jumplist/pokalhistorie/verein/10603'),
  (u'', '/jumplist/pokalhistorie/verein/10603'),
  (u'800', '/jumplist/pokalhistorie/verein/10603')],
 [(u'444', None),
  (u'Balzan Youths', '/fc-balzan-youths/startseite/verein/32129'),
  (u'', '/fc-balzan-youths/startseite/verein/32129'),
  (u'Balzan Youths', '/balzan-youths/startseite/verein/32129'),
  (u'', None),
  (u'175', '/jumplist/pokalhistorie/verein/32129'),
  (u'175', '/jumplist/pokalhistorie/verein/32129'),
  (u'25', '/jumplist/pokalhistorie/verein/32129'),
  (u'425', '/jumplist/pokalhistorie/verein/32129'),
  (u'', '/jumplist/pokalhistorie/verein/32129'),
  (u'800', '/jumplist/pokalhistorie/verein/32129')],
 [(u'445', None),
  (u'Tauras', '/tauras-taurage/startseite/verein/23562'),
  (u'', '/tauras-taurage/startseite/verein/23562'),
  (u'Tauras', '/tauras/startseite/verein/23562'),
  (u'', None),
  (u'225', '/jumplist/pokalhistorie/verein/23562'),
  (u'250', '/jumplist/pokalhistorie/verein/23562'),
  (u'100', '/jumplist/pokalhistorie/verein/23562'),
  (u'150', '/jumplist/pokalhistorie/verein/23562'),
  (u'', '/jumplist/pokalhistorie/verein/23562'),
  (u'725', '/jumplist/pokalhistorie/verein/23562')],
 [(u'446', None),
  (u'Europa FC', '/europa-fc/startseite/verein/42246'),
  (u'', '/europa-fc/startseite/verein/42246'),
  (u'Europa FC', '/europa-fc/startseite/verein/42246'),
  (u'', None),
  (u'-', '/jumplist/pokalhistorie/verein/42246'),
  (u'-', '/jumplist/pokalhistorie/verein/42246'),
  (u'300', '/jumplist/pokalhistorie/verein/42246'),
  (u'400', '/jumplist/pokalhistorie/verein/42246'),
  (u'', '/jumplist/pokalhistorie/verein/42246'),
  (u'700', '/jumplist/pokalhistorie/verein/42246')],
 [(u'447', None),
  (u'Sant Julia', '/ue-sant-julia/startseite/verein/11910'),
  (u'', '/ue-sant-julia/startseite/verein/11910'),
  (u'Sant Julia', '/sant-julia/startseite/verein/11910'),
  (u'', None),
  (u'-', '/jumplist/pokalhistorie/verein/11910'),
  (u'66', '/jumplist/pokalhistorie/verein/11910'),
  (u'350', '/jumplist/pokalhistorie/verein/11910'),
  (u'283', '/jumplist/pokalhistorie/verein/11910'),
  (u'', '/jumplist/pokalhistorie/verein/11910'),
  (u'699', '/jumplist/pokalhistorie/verein/11910')],
 [(u'448', None),
  (u'AC Juvenes-D.', '/ac-juvenes-dogana/startseite/verein/10607'),
  (u'', '/ac-juvenes-dogana/startseite/verein/10607'),
  (u'AC Juvenes-D.', '/ac-juvenes-d-/startseite/verein/10607'),
  (u'', None),
  (u'-', '/jumplist/pokalhistorie/verein/10607'),
  (u'66', '/jumplist/pokalhistorie/verein/10607'),
  (u'-', '/jumplist/pokalhistorie/verein/10607'),
  (u'250', '/jumplist/pokalhistorie/verein/10607'),
  (u'', '/jumplist/pokalhistorie/verein/10607'),
  (u'316', '/jumplist/pokalhistorie/verein/10607')],
 [(u'449', None),
  (u'Tre Fiori', '/sp-tre-fiori/startseite/verein/10617'),
  (u'', '/sp-tre-fiori/startseite/verein/10617'),
  (u'Tre Fiori', '/tre-fiori/startseite/verein/10617'),
  (u'', None),
  (u'-', '/jumplist/pokalhistorie/verein/10617'),
  (u'66', '/jumplist/pokalhistorie/verein/10617'),
  (u'-', '/jumplist/pokalhistorie/verein/10617'),
  (u'-', '/jumplist/pokalhistorie/verein/10617'),
  (u'', '/jumplist/pokalhistorie/verein/10617'),
  (u'66', '/jumplist/pokalhistorie/verein/10617')]]

In [138]:
co.set_index('Country').ix[:20][co.columns[2:-2]].T.plot.line(figsize=(20,10))


Out[138]:
<matplotlib.axes._subplots.AxesSubplot at 0x10ea0ffd0>

In [109]:
ts1['calculatedPoints']=(ts1['wins']*3)+ts1['draws']
ts1['calculatedMatches']=ts1['wins']+ts1['draws']+ts1['losses']
ts1['calculatedGoalDiff']=ts1['goalsFor']-ts1['goalsAgainst']

ts1[['points','calculatedPoints','wins','draws','matches','calculatedMatches','goalDiff','calculatedGoalDiff']]


Out[109]:
points calculatedPoints wins draws matches calculatedMatches goalDiff calculatedGoalDiff
0 79 79 25 4 30 30 59 59
1 66 66 19 9 30 30 43 43
2 50 50 14 8 30 30 16 16
3 48 48 14 6 30 30 -9 -9
4 46 46 13 7 30 30 16 16
5 40 40 11 7 30 30 3 3
6 38 38 10 8 30 30 -2 -2
7 37 37 10 7 30 30 -5 -5
8 37 37 10 7 30 30 -10 -10
9 35 35 8 11 30 30 -10 -10
10 34 34 9 7 30 30 -10 -10
11 34 34 9 7 30 30 -15 -15
12 30 30 7 9 30 30 -14 -14
13 30 30 8 6 30 30 -27 -27
14 29 29 7 8 30 30 -18 -18
15 27 27 6 9 30 30 -17 -17

In [227]:
def getPlayersByLeagueSeasonFile(league,season):
    filename = league + "_" + str(season)
    df = pd.read_excel("../data/rawindex/" + filename + ".xls",index_col=2,names=['','name'])
    df["filename"]=filename
    df["league"]=league
    df["season"]=season
    return df

players = pd.concat([getPlayersByLeagueSeasonFile(league,season) \
     for season in range(2001,2016) \
     for league in ['GB1','ES1','IT1','L1','FR1','TR1','RU1','PO1','NL1','BE1','GB2','MLS1']])[["name","filename","league"]] \
    .drop_duplicates(subset=['name','league']).sort_index()

#[DataFrame(players[players["filename"]==x])["name"] for x in players["filename"].drop_duplicates()]


Out[227]:
name filename league
7767 Fernando Torres ES1_2002 ES1
7767 Fernando Torres IT1_2014 IT1
7767 Fernando Torres GB1_2007 GB1

In [302]:
def getPersonalFile(league,season):
    filename = league + "_" + str(season)
    result = pd.read_csv("../data/personal/" + filename + ".csv")
    result['filename'] = filename
    return result

df = pd.concat([getPersonalFile(league, season) for season in range(2001,2016) for league in ['GB1']])
df[pd.notnull(df['error'])][['id','error','filename']]


Out[302]:
id error filename
100 108471 'NoneType' object has no attribute 'find_all' GB1_2011
81 73786 'NoneType' object has no attribute 'find_all' GB1_2013

In [316]:
#getPlayerDataFromBs(getBsByPlayerId(73786))

#bs = getBsByPlayerId(45559)  TRANSIENT?
#bs = getBsByPlayerId(87658)  TRANSIENT?
#bs = getBsByPlayerId(108471)  ?????
#bs = getBsByPlayerId(145466 ) FIXED
#bs = getBsByPlayerId(73786) ?????
#bs = getBsByPlayerId(206225) FIXED

bs = getBsByPlayerId(196628) 

getPlayerDataFromBs(bs)


now processing Alessandro Stefanini
Out[316]:
{u'Age': u'28',
 u'Contract until': u'-',
 'Country of birth': u'Italy',
 u'Current club': u'Rosignano Sei Rose',
 'Current club id': u'27543',
 u'Date of birth': u'Jun 23, 1987',
 'Display name': u'Alessandro Stefanini',
 u'In the team since': u'Jul 1, 2011',
 u'Place of birth': u'Firenze',
 u'Position': u'Midfield',
 'birthDate': u'Jun 23, 1987 (28)'}

In [303]:
def getTransfersFile(league,season):
    filename = league + "_" + str(season)
    result = pd.read_csv("../data/transfers/" + filename + ".csv")
    result['filename'] = filename
    return result

df = pd.concat([getTransfersFile(league, season) for season in range(2001,2016) for league in ['GB1']])
df[pd.notnull(df['error'])][['id','error','filename']]


Out[303]:
id error filename
5787 145527 'NoneType' object has no attribute 'find_all' GB1_2001
2728 63533 'NoneType' object has no attribute 'find_all' GB1_2002
2729 128831 'NoneType' object has no attribute 'find_all' GB1_2002
2163 14077 'NoneType' object has no attribute 'find_all' GB1_2004
627 45559 'NoneType' object has no attribute 'find' GB1_2008
226 87658 'NoneType' object has no attribute 'find' GB1_2009
1530 108471 'NoneType' object has no attribute 'find_all' GB1_2011
1571 325105 'NoneType' object has no attribute 'find_all' GB1_2013
1572 73786 'NoneType' object has no attribute 'find_all' GB1_2013
1275 296986 'NoneType' object has no attribute 'find_all' GB1_2014
1276 314237 'NoneType' object has no attribute 'find_all' GB1_2014

In [306]:
bs = getBsByPlayerId(108471)  

getTransfersFromBs(bs)


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-306-0e95dc7c5cff> in <module>()
      3 bs = getBsByPlayerId(108471)
      4 
----> 5 getTransfersFromBs(bs)

<ipython-input-291-87a75e5e8211> in getTransfersFromBs(bs)
     42 
     43 def getTransfersFromBs(bs):
---> 44     elements = bs.find(class_="transferhistorie").find_all("tr",class_="zeile-transfer")
     45     dicts = [{'seasonDate': "  ".join([td.getText() for td in e.findAll("td")[:2]]),
     46       'mv': e.find("td",class_="zelle-mw").getText(),

AttributeError: 'NoneType' object has no attribute 'find_all'

In [299]:
df.drop('error',axis=1)


Out[299]:
date fee filename fromCountry fromTeamId fromTeamName id mv playerName season toCountry toTeamId toTeamName
0 Jul 1, 2014 - GB1_2001 Australia 970 Perth Glory 4096 300 Th. € Jacob Burns 14/15 NaN 123 End of career
1 Jul 1, 2009 Free transfer GB1_2001 Romania 10442 Unirea Urziceni 4096 400 Th. € Jacob Burns 09/10 Australia 970 Perth Glory
2 Feb 1, 2008 Free transfer GB1_2001 Poland 422 Wisla Kraków 4096 450 Th. € Jacob Burns 07/08 Romania 10442 Unirea Urziceni
3 Feb 1, 2006 100 Th. € GB1_2001 England 349 Barnsley FC 4096 400 Th. € Jacob Burns 05/06 Poland 422 Wisla Kraków
4 Jan 1, 2004 - GB1_2001 England 515 Unattached 4096 - Jacob Burns 03/04 NaN 349 Barnsley FC
5 Jul 1, 2003 - GB1_2001 England 399 Leeds 4096 - Jacob Burns 03/04 NaN 515 Unattached
6 Jul 1, 2000 375 Th. € GB1_2001 Australia 2500 Parra. Power 4096 - Jacob Burns 00/01 England 399 Leeds
7 Jul 1, 1999 ? GB1_2001 Australia 6045 Sydney United 4096 - Jacob Burns 99/00 Australia 2500 Parra. Power
8 Jan 11, 2016 - GB1_2001 England 2804 Barnet 4097 150 Th. € Kevin Lisbie 15/16 NaN 515 Unattached
9 Jul 1, 2015 Free transfer GB1_2001 England 1150 Leyton Orient 4097 150 Th. € Kevin Lisbie 15/16 England 2804 Barnet
10 Apr 15, 2015 End of loan GB1_2001 England 3684 Stevenage FC 4097 150 Th. € Kevin Lisbie 14/15 England 1150 Leyton Orient
11 Mar 17, 2015 Loan GB1_2001 England 1150 Leyton Orient 4097 150 Th. € Kevin Lisbie 14/15 England 3684 Stevenage FC
12 Sep 1, 2011 - GB1_2001 England 677 Ipswich 4097 750 Th. € Kevin Lisbie 11/12 England 1150 Leyton Orient
13 Jun 30, 2011 End of loan GB1_2001 England 1028 Millwall 4097 750 Th. € Kevin Lisbie 10/11 England 677 Ipswich
14 Aug 1, 2010 Loan GB1_2001 England 677 Ipswich 4097 1,25 Mill. € Kevin Lisbie 10/11 England 1028 Millwall
15 Jun 30, 2010 End of loan GB1_2001 England 1060 Colchester Utd. 4097 1,25 Mill. € Kevin Lisbie 09/10 England 677 Ipswich
16 Aug 1, 2009 Loan GB1_2001 England 677 Ipswich 4097 1,25 Mill. € Kevin Lisbie 09/10 England 1060 Colchester Utd.
17 Jul 1, 2008 750 Th. € GB1_2001 England 1060 Colchester Utd. 4097 1,25 Mill. € Kevin Lisbie 08/09 England 677 Ipswich
18 Jul 1, 2007 Free transfer GB1_2001 England 358 Charlton 4097 2,00 Mill. € Kevin Lisbie 07/08 England 1060 Colchester Utd.
19 Jun 30, 2006 End of loan GB1_2001 England 22 Derby 4097 2,50 Mill. € Kevin Lisbie 05/06 England 358 Charlton
20 Feb 1, 2006 Loan GB1_2001 England 358 Charlton 4097 2,50 Mill. € Kevin Lisbie 05/06 England 22 Derby
21 Oct 1, 2005 End of loan GB1_2001 England 1123 Norwich 4097 2,50 Mill. € Kevin Lisbie 05/06 England 358 Charlton
22 Sep 1, 2005 Loan GB1_2001 England 358 Charlton 4097 2,50 Mill. € Kevin Lisbie 05/06 England 1123 Norwich
23 Dec 31, 2000 End of loan GB1_2001 England 1039 QPR 4097 - Kevin Lisbie 00/01 England 358 Charlton
24 Dec 1, 2000 Loan GB1_2001 England 358 Charlton 4097 - Kevin Lisbie 00/01 England 1039 QPR
25 Dec 31, 1999 End of loan GB1_2001 England 1032 Reading 4097 - Kevin Lisbie 99/00 England 358 Charlton
26 Nov 1, 1999 Loan GB1_2001 England 358 Charlton 4097 - Kevin Lisbie 99/00 England 1032 Reading
27 Apr 1, 1999 End of loan GB1_2001 England 2814 Gillingham FC 4097 - Kevin Lisbie 98/99 England 358 Charlton
28 Mar 1, 1999 Loan GB1_2001 England 358 Charlton 4097 - Kevin Lisbie 98/99 England 2814 Gillingham FC
29 Jul 1, 1995 - GB1_2001 England 6932 Charlton U18 4097 - Kevin Lisbie 95/96 England 358 Charlton
... ... ... ... ... ... ... ... ... ... ... ... ... ...
761 Jul 1, 2011 3,50 Mill. € GB1_2015 Italy 252 Genoa 16810 3,50 Mill. € Marco Amelia 11/12 Italy 5 AC Milan
762 Jun 30, 2011 End of loan GB1_2015 Italy 5 AC Milan 16810 3,50 Mill. € Marco Amelia 10/11 Italy 252 Genoa
763 Jul 1, 2010 Loan GB1_2015 Italy 252 Genoa 16810 9,00 Mill. € Marco Amelia 10/11 Italy 5 AC Milan
764 Aug 1, 2009 5,00 Mill. € GB1_2015 Italy 458 US Palermo 16810 10,00 Mill. € Marco Amelia 09/10 Italy 252 Genoa
765 Jul 1, 2008 6,00 Mill. € GB1_2015 Italy 1210 AS Livorno 16810 12,00 Mill. € Marco Amelia 08/09 Italy 458 US Palermo
766 Jun 30, 2004 End of loan GB1_2015 Italy 130 AC Parma 16810 - Marco Amelia 03/04 Italy 1210 AS Livorno
767 Jan 1, 2004 Loan GB1_2015 Italy 1210 AS Livorno 16810 - Marco Amelia 03/04 Italy 130 AC Parma
768 Dec 1, 2003 End of loan GB1_2015 Italy 1005 Lecce 16810 - Marco Amelia 03/04 Italy 1210 AS Livorno
769 Jul 1, 2003 Loan GB1_2015 Italy 1210 AS Livorno 16810 - Marco Amelia 03/04 Italy 1005 Lecce
770 Jul 1, 2001 gratuito GB1_2015 Italy 12 AS Roma 16810 - Marco Amelia 01/02 Italy 1210 AS Livorno
771 Jul 1, 2000 - GB1_2015 Italy 11997 Roma Primavera 16810 - Marco Amelia 00/01 Italy 12 AS Roma
772 Jul 22, 2015 Free transfer GB1_2015 Italy 6195 SSC Napoli 76799 3,50 Mill. € Miguel Britos 15/16 England 1010 Watford
773 Jul 12, 2011 9,00 Mill. € GB1_2015 Italy 1025 Bologna 76799 3,50 Mill. € Miguel Britos 11/12 Italy 6195 SSC Napoli
774 Jul 12, 2008 4,00 Mill. € GB1_2015 Uruguay 2403 Wanderers 76799 - Miguel Britos 08/09 Italy 1025 Bologna
775 Jul 1, 2007 ? GB1_2015 Uruguay 17428 Juventud 76799 - Miguel Britos 07/08 Uruguay 2403 Wanderers
776 Jan 1, 2006 ? GB1_2015 Uruguay 3829 CA Fénix 76799 - Miguel Britos 05/06 Uruguay 17428 Juventud
777 Jul 1, 2005 - GB1_2015 Uruguay 27779 CA Fénix U19 76799 - Miguel Britos 05/06 Uruguay 3829 CA Fénix
778 Jul 10, 2015 11,00 Mill. € GB1_2015 Italy 506 Juventus 48002 9,00 Mill. € Angelo Ogbonna 15/16 England 379 West Ham
779 Jul 11, 2013 15,00 Mill. € GB1_2015 Italy 416 Torino 48002 12,00 Mill. € Angelo Ogbonna 13/14 Italy 506 Juventus
780 Jun 30, 2008 End of loan GB1_2015 Italy 4083 Crotone 48002 200 Th. € Angelo Ogbonna 07/08 Italy 416 Torino
781 Jul 1, 2007 Loan GB1_2015 Italy 416 Torino 48002 200 Th. € Angelo Ogbonna 07/08 Italy 4083 Crotone
782 Feb 1, 2007 - GB1_2015 Italy 12130 FC Turin 48002 - Angelo Ogbonna 06/07 Italy 416 Torino
783 Jul 27, 2015 6,50 Mill. € GB1_2015 Netherlands 1090 AZ Alkmaar 129554 3,50 Mill. € Steven Berghuis 15/16 England 1010 Watford
784 Jul 1, 2012 500 Th. € GB1_2015 Netherlands 317 FC Twente 129554 600 Th. € Steven Berghuis 12/13 Netherlands 1090 AZ Alkmaar
785 Jun 30, 2012 End of loan GB1_2015 Netherlands 1426 VVV-Venlo 129554 600 Th. € Steven Berghuis 11/12 Netherlands 317 FC Twente
786 Jan 1, 2012 Loan GB1_2015 Netherlands 317 FC Twente 129554 600 Th. € Steven Berghuis 11/12 Netherlands 1426 VVV-Venlo
787 Jul 1, 2011 - GB1_2015 Netherlands 18026 FC Twente U21 129554 150 Th. € Steven Berghuis 11/12 Netherlands 317 FC Twente
788 Jul 1, 2010 - GB1_2015 Netherlands 6200 FC Twente U19 129554 - Steven Berghuis 10/11 Netherlands 18026 FC Twente U21
789 Jul 1, 2009 Free transfer GB1_2015 Netherlands 15589 Go Ahead U19 129554 - Steven Berghuis 09/10 Netherlands 6200 FC Twente U19
790 Jul 1, 2008 Free transfer GB1_2015 Netherlands 33234 WSV Apeldoorn 129554 - Steven Berghuis 08/09 Netherlands 15589 Go Ahead U19

31040 rows × 13 columns


In [276]:
url = "https:// twitter.com/timream5"
url.split("//")[-1].split("/")[0].split(".")[-2]


Out[276]:
' twitter'

In [146]:
bs = getBsByPlayerId(69651)

In [151]:
#getPlayerDataFromBs(bs)
DataFrame(getTransfersFromBs(bs))


Out[151]:
date fee fromCountry fromTeamId fromTeamName mv playerName season toCountry toTeamId toTeamName
0 May 16, 2016 End of loan Scotland 43 Heart of Midl. 700 Th. € Abiola Dauda 15/16 Netherlands 499 Vitesse
1 Feb 1, 2016 Loan Netherlands 499 Vitesse 700 Th. € Abiola Dauda 15/16 Scotland 43 Heart of Midl.
2 Jul 24, 2014 500 Th. € Serbia 159 Red Star 1,25 Mill. € Abiola Dauda 14/15 Netherlands 499 Vitesse
3 Feb 8, 2013 Free transfer Sweden 3654 Kalmar FF 1,00 Mill. € Abiola Dauda 12/13 Serbia 159 Red Star
4 Jan 1, 2008 ? Sweden 8905 Sölvesborgs GoIF - Abiola Dauda 07/08 Sweden 3654 Kalmar FF

In [97]:
bs = BeautifulSoup(urlopen(Request(getUrlByPlayerId(7767), headers={'User-Agent': useragent})))

In [131]:
nationalTeamLinks=[a for a in bs.find(class_="dataContent").findAll("a") if '/nationalmannschaft/' in a.get("href")]

if nationalTeamLinks:
    print '/'.join([a.getText() for a in nationalTeamLinks])
    print nationalTeamLinks[0].get("href").split("/")[-1]


110/38
3375

In [175]:
index = pd.read_csv('../data/index/NL1_2015.csv', index_col=0,names=['name'])
index


Out[175]:
name
22372 Tom van Hyfte
37217 Pär Hansson
51969 Marcel Appiah
56379 Erik Quekel
57333 Jergé Hoefdraad
58342 Sheran Yeini
60289 Rajko Brezancic
64511 Jeroen Tesselaar
75219 Jasper Heusinkveld
78818 Dario Dumic
88212 Nigel Hasselbaink
95759 Kasper Larsen
96043 Mikael Dyrestam
96778 Georgios Katsikas
110755 Karim Tarfi
110911 Kosuke Ota
119066 Bryan Smeets
126770 Dominik Masek
129587 Willem Ofori
144244 Pantelis Hatzidiakos
159894 Kevin van Veen
161314 Wojciech Golla
164837 Kristopher Vida
169362 Luka Zahovic
170856 Boban Lazic
171820 Tomi Juric
180843 Paul Gladon
182909 Louis Nganioni
187185 Nassir El Aissati
187191 Farshad Noor
... ...
320825 Delvechio Blackson
320828 Mark Bruintjes
320829 Jeroen Buitenhuis
320839 Pim ten Have
324702 Jan Lammers
324921 Youssef El Jebli
326460 Samet Bulut
328105 Tim Blättler
328959 Jeroen van der Lely
330931 Brem Soumaoro
335400 Nick Wolters
336303 Bart Straalman
337670 Alessio Da Cruz
337673 Julian Lelieveld
342702 Michel Vlap
347038 Thomas Oude Kotte
350193 Dennis van der Heijden
354587 Levi García
356537 Levi Opdam
360517 Jay-Roy Grot
374698 Guy Smit
376709 Rowdy van der Putten
384807 Thomas Agyepong
387047 Danilho Doekhi
388636 Vincent Schmidt
393239 Mitchell van Bergen
396106 Thomas Kok
403839 Maecky Ngombo
408438 Amusah Abubakar
411287 Justin Hoogma

123 rows × 1 columns


In [217]:
league='NL1'
season=2015
filename=league+'_'+str(season)
personalFile='../data/personal/'+filename+'.csv'
transfersFile='../data/transfers/'+filename+'.csv'
if os.path.isfile(personalFile) and os.path.isfile(transfersFile) \
    and (not 'error' in pd.read_csv(personalFile).columns) \
    and (not 'error' in pd.read_csv(transfersFile).columns):
    print str(datetime.datetime.now()), filename, "files already complete"
else:
    print str(datetime.datetime.now()), "retrieving", filename
    index = pd.read_csv('../data/index/'+league+'_'+str(season)+'.csv', index_col=0,names=['name'])
    dataList=[]
    transfersList=[]
    for id in [22372,51969,37217]:
        try:
            bs = getBsByPlayerId(id)
        except Exception as e:
            print "error", e
        try:
            data = getPlayerDataFromBs(bs)
            data.update({'id':id})
        except Exception as e:
            print "error", e
            data = {'id':id, 'error': e}
        dataList.append(data)
        try:
            transfers = getTransfersFromBs(bs)
            [transfer.update({'id':id}) for transfer in transfers]
        except Exception as e:
            transfers = [{'id':id, 'error': e}]
        transfersList.append(transfers)
DataFrame(dataList).to_csv(personalFile,index=False,encoding='utf-8')
pd.concat([DataFrame(transfers) for transfers in transfersList], ignore_index=True).to_csv(transfersFile,index=False,encoding='utf-8')
print str(datetime.datetime.now()), "done"


2016-05-21 02:12:05.431244 NL1_2015 files already complete
2016-05-21 02:12:05.445932 done

In [222]:
df = pd.read_csv(transfersFile)
df.index= df['id']
df


Out[222]:
date fee fromCountry fromTeamId fromTeamName id mv playerName season toCountry toTeamId toTeamName
id
22372 Jul 1, 2014 Free transfer Netherlands 384 MVV 22372 275 Th. € Tom van Hyfte 14/15 Netherlands 192 Roda JC
22372 Jul 1, 2009 Free transfer Belgium 3134 KFC VW Hamme 22372 200 Th. € Tom van Hyfte 09/10 Netherlands 384 MVV
22372 Jul 1, 2004 75 Th. € Belgium 3038 RSC Anderlecht U19 22372 - Tom van Hyfte 04/05 Belgium 3134 KFC VW Hamme
37217 Jan 25, 2016 Free transfer Sweden 699 Helsingborgs IF 37217 1,50 Mill. € Pär Hansson 15/16 Netherlands 234 Feyenoord
37217 Dec 1, 2008 End of loan Sweden 3625 Ängelholm 37217 75 Th. € Pär Hansson 08/09 Sweden 699 Helsingborgs IF
37217 Feb 1, 2008 Loan Sweden 699 Helsingborgs IF 37217 75 Th. € Pär Hansson 07/08 Sweden 3625 Ängelholm
37217 Dec 1, 2007 End of loan Sweden 3625 Ängelholm 37217 75 Th. € Pär Hansson 07/08 Sweden 699 Helsingborgs IF
37217 Feb 1, 2006 Loan Sweden 699 Helsingborgs IF 37217 75 Th. € Pär Hansson 05/06 Sweden 3625 Ängelholm
37217 Jan 1, 2006 - Sweden 8659 Helsingborg U19 37217 - Pär Hansson 05/06 Sweden 699 Helsingborgs IF
37217 Jan 1, 2002 ? Sweden 23491 Vejbyslätts IF 37217 - Pär Hansson 01/02 Sweden 8659 Helsingborg U19
51969 Jul 1, 2016 Free transfer Netherlands 467 NEC Nijmegen 51969 500 Th. € Marcel Appiah 16/17 NaN 75 Unknown
51969 Jul 26, 2014 Free transfer Germany 10 Arm. Bielefeld 51969 500 Th. € Marcel Appiah 14/15 Netherlands 467 NEC Nijmegen
51969 Nov 12, 2010 - Germany 831 A. Bielefeld II 51969 100 Th. € Marcel Appiah 10/11 Germany 10 Arm. Bielefeld
51969 Jul 1, 2008 Free transfer Germany 1000 TSG Sprockhövel 51969 - Marcel Appiah 08/09 Germany 831 A. Bielefeld II
51969 Jul 1, 2007 Free transfer Germany 9604 Wattenschd. U19 51969 - Marcel Appiah 07/08 Germany 1000 TSG Sprockhövel
51969 Jul 1, 2005 Free transfer Germany 21085 VfL Bochum U17 51969 - Marcel Appiah 05/06 Germany 9604 Wattenschd. U19
51969 Jan 1, 2005 Free transfer Germany 37977 TSG SH U17 51969 - Marcel Appiah 04/05 Germany 21085 VfL Bochum U17
51969 Jul 1, 2004 Free transfer Germany 21061 Schalke 04 U17 51969 - Marcel Appiah 04/05 Germany 37977 TSG SH U17
51969 Jul 1, 2003 - Germany 11884 Schalke 04 Jgd. 51969 - Marcel Appiah 03/04 Germany 21061 Schalke 04 U17
56379 Jul 1, 2015 Free transfer Netherlands 404 FC Den Bosch 56379 200 Th. € Erik Quekel 15/16 Netherlands 642 De Graafschap
56379 Jun 30, 2015 End of loan Netherlands 9715 PSV U21 56379 200 Th. € Erik Quekel 14/15 Netherlands 404 FC Den Bosch
56379 Feb 2, 2015 Loan Netherlands 404 FC Den Bosch 56379 225 Th. € Erik Quekel 14/15 Netherlands 9715 PSV U21
56379 Aug 29, 2013 Free transfer Netherlands 500 Helmond Sport 56379 150 Th. € Erik Quekel 13/14 Netherlands 404 FC Den Bosch
56379 Jul 1, 2011 Free transfer Netherlands 1455 FC Dordrecht 56379 225 Th. € Erik Quekel 11/12 Netherlands 500 Helmond Sport
56379 Jul 1, 2010 Free transfer Netherlands 1228 FC Oss 56379 250 Th. € Erik Quekel 10/11 Netherlands 1455 FC Dordrecht
56379 Jul 1, 2006 - Netherlands 24371 TOP Oss U19 56379 - Erik Quekel 06/07 Netherlands 1228 TOP Oss
57333 Jul 1, 2015 Free transfer Netherlands 723 Almere City 57333 200 Th. € Jergé Hoefdraad 15/16 Netherlands 133 SC Cambuur
57333 Nov 6, 2013 - Netherlands 515 Unattached 57333 175 Th. € Jergé Hoefdraad 13/14 NaN 723 Almere City
57333 Jul 1, 2013 - Netherlands 1434 Telstar 57333 175 Th. € Jergé Hoefdraad 13/14 NaN 515 Unattached
57333 Jul 1, 2011 Free transfer Netherlands 723 Almere City 57333 125 Th. € Jergé Hoefdraad 11/12 Netherlands 1434 Telstar
... ... ... ... ... ... ... ... ... ... ... ... ...
342702 Jul 1, 2015 - Netherlands 4779 Heerenveen U19 342702 - Michel Vlap 15/16 Netherlands 306 SC Heerenveen
342702 Jul 1, 2014 - Netherlands 8521 Heerenveen U17 342702 - Michel Vlap 14/15 Netherlands 4779 Heerenveen U19
347038 Jul 1, 2016 - Netherlands 36892 Vitesse U21 347038 50 Th. € Thomas Oude Kotte 16/17 Netherlands 499 Vitesse
347038 Jul 1, 2015 - Netherlands 7465 Vitesse U19 347038 - Thomas Oude Kotte 15/16 Netherlands 36892 Vitesse U21
350193 Feb 14, 2016 - Netherlands 10709 ADO U19 350193 50 Th. € Dennis van der Heijden 15/16 Netherlands 1268 ADO Den Haag
350193 Jul 1, 2014 - Netherlands 36112 ADO U17 350193 - Dennis van der Heijden 14/15 Netherlands 10709 ADO U19
350193 Jul 1, 2012 Free transfer Netherlands 44863 Dordrecht Youth 350193 - Dennis van der Heijden 12/13 Netherlands 36112 ADO U17
354587 Nov 20, 2015 ? Trinidad and Tobago 37569 Central FC 354587 100 Th. € Levi García 15/16 Netherlands 1090 AZ Alkmaar
354587 Jul 1, 2014 Free transfer Trinidad and Tobago 37572 T&TEC SC 354587 - Levi García 14/15 Trinidad and Tobago 37569 Central FC
356537 Jul 1, 2015 - Netherlands 7460 AZ Alkmaar U19 356537 - Levi Opdam 15/16 Netherlands 11368 AZ Alkmaar U21
360517 Jul 1, 2015 - Netherlands 7463 NEC/FC Oss U19 360517 - Jay-Roy Grot 15/16 Netherlands 467 NEC Nijmegen
360517 Jan 1, 2015 - Netherlands 32800 NEC/Oss Youth 360517 - Jay-Roy Grot 14/15 Netherlands 7463 NEC/FC Oss U19
374698 Jul 1, 2015 Free transfer Netherlands 7465 Vitesse U19 374698 - Guy Smit 15/16 Netherlands 11154 NEC U21
376709 Jul 1, 2015 Free transfer Netherlands 50220 Feyenoord U18 376709 - Rowdy van der Putten 15/16 Netherlands 20303 SC Cambuur U21
384807 Jun 30, 2016 End of loan Netherlands 317 FC Twente 384807 400 Th. € Thomas Agyepong 15/16 England 9265 Man City U21
384807 Aug 1, 2015 Loan England 9265 Man City U21 384807 - Thomas Agyepong 15/16 Netherlands 317 FC Twente
384807 Jan 1, 2015 ? Ghana 40902 Right to Dream 384807 - Thomas Agyepong 14/15 England 9265 Man City U21
387047 Jul 1, 2016 Free transfer Netherlands 14350 Excelsior U19 387047 50 Th. € Danilho Doekhi 16/17 Netherlands 8817 Ajax U21
387047 Jul 1, 2015 - Netherlands 50204 Excelsior Jeugd 387047 - Danilho Doekhi 15/16 Netherlands 14350 Excelsior U19
388636 Jul 1, 2015 - Netherlands 6200 FC Twente U19 388636 - Vincent Schmidt 15/16 Netherlands 18026 FC Twente U21
393239 Jul 1, 2016 - Netherlands 45691 Vitesse U17 393239 100 Th. € Mitchell van Bergen 16/17 Netherlands 7465 Vitesse U19
393239 Jul 1, 2015 Free transfer Netherlands 32807 Willem II Youth 393239 - Mitchell van Bergen 15/16 Netherlands 45691 Vitesse U17
396106 Jul 1, 2015 - Netherlands 53474 Willem II U17 396106 - Thomas Kok 15/16 Netherlands 4778 Willem II U19
403839 Jul 1, 2016 Free transfer Netherlands 192 Roda JC 403839 600 Th. € Maecky Ngombo 16/17 NaN 75 Unknown
403839 Nov 27, 2015 - Netherlands 20302 Roda JC U21 403839 - Maecky Ngombo 15/16 Netherlands 192 Roda JC
403839 Jul 1, 2015 Free transfer Belgium 24394 Standard Res. 403839 - Maecky Ngombo 15/16 Netherlands 20302 Roda JC U21
403839 Jul 1, 2014 - Belgium 8164 Standard U19 403839 - Maecky Ngombo 14/15 Belgium 24394 Standard Res.
408438 Mar 1, 2016 - Netherlands 515 Unattached 408438 50 Th. € Amusah Abubakar 15/16 NaN 4778 Willem II U19
411287 Jan 1, 2016 ? Netherlands 6200 FC Twente U19 411287 - Justin Hoogma 15/16 Netherlands 1304 Heracles Almelo
411287 Jul 1, 2014 - Netherlands 18343 FC Twente U17 411287 - Justin Hoogma 14/15 Netherlands 6200 FC Twente U19

404 rows × 12 columns


In [204]:
DataFrame(dataList)
#pd.concat([DataFrame(transfers) for transfers in transfersList], ignore_index=True)


Out[204]:
Age Contract until Country of birth Current club Current club id Date of birth Date of last contract extension Detailed position Display name Foot ... Name in home country National team id Nationality Place of birth Player's agent Player's agent id Position birthDate id website
0 30 30.06.2018 Belgium Roda JC Kerkrade 192 Apr 28, 1986 Sep 4, 2015 Main position:Central Midfield Side position: ... Tom van Hyfte left ... NaN NaN Belgium Gent Forza Sports Group 2553 Midfield - Central Midfield Apr 28, 1986 (30) 22372 NaN
1 29 30.06.2017 Sweden Feyenoord Rotterdam 234 Jun 22, 1986 NaN Main position:Keeper Pär Hansson right ... Pär Johan Åke Hansson 3557 Sweden Vejbystrand Global Soccer Management 786 Goalkeeper Jun 22, 1986 (29) 37217 NaN
2 28 30.06.2016 Germany NEC Nijmegen 467 Mar 26, 1988 Jun 26, 2015 Main position:Right-Back Side position: Centre... Marcel Appiah right ... NaN NaN Germany   Ghana Schwelm neblung sportsnetwork 175 Defence - Right-Back Mar 26, 1988 (28) 51969 NaN
3 29 30.06.2017 Netherlands De Graafschap Doetinchem 642 Apr 16, 1987 NaN Main position:Centre Forward Side position: Ri... Erik Quekel NaN ... NaN NaN Netherlands Den Bosch NaN NaN Striker - Centre Forward Apr 16, 1987 (29) 56379 NaN
4 29 30.06.2016 Netherlands SC Cambuur-Leeuwarden 133 Jul 17, 1986 NaN Main position:Left Wing Side position: Attacki... Jergé Hoefdraad left ... NaN NaN Netherlands   Suriname Amsterdam NaN NaN Striker - Left Wing Jul 17, 1986 (29) 57333 https://www.jergehoefdraad.nl
5 29 30.06.2019 Israel Vitesse Arnhem 499 Dec 8, 1986 NaN Main position:Defensive Midfield Side position... Sheran Yeini right ... שרן ייני‎ 5547 Israel Tel Aviv Top Sport Services 231 Midfield - Defensive Midfield Dec 8, 1986 (29) 58342 NaN
6 26 30.06.2018 Jugoslawien (SFR) AZ Alkmaar 1090 Aug 21, 1989 NaN Main position:Left-Back Side position: Left Wing Rajko Brezancic left ... Рајко Брежанчић 9566 Serbia   Bosnia-Herzegovina Vlasenica GET11 AG 2208 Defence - Left-Back Aug 21, 1989 (26) 60289 NaN

7 rows × 23 columns


In [ ]:
new BeautifulSoup()

In [56]:
for season in range(2015,2016):
    for league in ['GB1']:
        filename = league + "_" + str(season) + ".xls"
        if os.path.isfile(filename):
            print str(datetime.datetime.now()), filename, "already exists"
        else:
            print str(datetime.datetime.now()), "retrieving", filename
            DataFrame([player for club in getClubsByLeagueId(league,season) \
                                             for player in getPlayersByClubId(club['clubId'],season)]).to_excel(filename)
            print str(datetime.datetime.now()), "done"


GB1_2015.xls already exists

In [69]:
str(datetime.datetime.now())


Out[69]:
'2016-05-19 23:30:41.259962'

In [ ]:
#epl = [getPlayerData(player["playerId"]) for club in getClubsByLeagueId('GB1') for player in getPlayersByClubId(club["clubId"])]
bundesliga = [getPlayerData(player["playerId"]) for club in getClubsByLeagueId('L1')[:2] for player in getPlayersByClubId(club["clubId"])]
DataFrame(bundesliga).to_excel("bundesliga.xls")

In [22]:
clubs=[]
for league in youthLeagues+tier1+tier2+tier3:
    result=getClubsByLeagueId(league)
    [club.update({'league': league}) for club in result]
    clubs.extend(result)

In [21]:
clubs

In [23]:
DataFrame(clubs)


Out[23]:
clubId league name
0 9249 GB21 Arsenal FC U21
1 9252 GB21 Liverpool FC U21
2 9261 GB21 Everton FC U21
3 39341 GB21 Leicester City U21
4 39336 GB21 Brighton & Hove Albion U21
5 33616 GB21 Swansea City U21
6 9265 GB21 Manchester City U21
7 9267 GB21 West Ham United U21
8 9250 GB21 Chelsea FC U21
9 24226 GB21 Wolverhampton Wanderers U21
10 24219 GB21 Stoke City U21
11 33614 GB21 Norwich City U21
12 36123 GB21 FC Reading U21
13 9251 GB21 Manchester United U21
14 9256 GB21 Middlesbrough FC U21
15 9254 GB21 Tottenham Hotspur U21
16 39337 GB21 Derby County U21
17 9253 GB21 Newcastle United U21
18 9258 GB21 Blackburn Rovers U21
19 9262 GB21 Fulham FC U21
20 9264 GB21 Sunderland AFC U21
21 12124 GB21 Aston Villa U21
22 29990 GB21 West Bromwich Albion U21
23 36546 GB21 Southampton FC U21
24 5343 AJ1 VfL Wolfsburg U19
25 26621 AJ1 RasenBallsport Leipzig U19
26 2491 AJ1 SV Werder Bremen U19
27 2663 AJ1 Hannover 96 U19
28 9196 AJ1 Eintracht Braunschweig U19
29 8663 AJ1 FC Carl Zeiss Jena U19
... ... ... ...
812 13562 IT3B Savona FBC
813 1307 IT3B US Pistoiese 1921
814 14888 IT3B US Città di Pontedera
815 4111 IT3B AC Rimini 1912
816 2250 IT3B AC Prato
817 4153 IT3B L'Aquila Calcio
818 27525 IT3B Santarcangelo Calcio
819 4159 IT3B Carrarese Calcio
820 21474 IT3B SS Maceratese
821 36745 IT3B AC Tuttocuoio
822 1253 IT3B AS Lucchese Libertas
823 22302 IT3B Lupa Roma FC
824 1627 IT3C Calcio Catania
825 1005 IT3C US Lecce
826 704 IT3C Foggia Calcio
827 4171 IT3C Benevento Calcio
828 4106 IT3C Casertana FC
829 5587 IT3C SS Juve Stabia
830 18950 IT3C SS Matera Calcio
831 4031 IT3C Cosenza Calcio
832 1104 IT3C ACR Messina
833 33734 IT3C SS Monopoli 1966
834 10670 IT3C AS Melfi
835 4085 IT3C Fidelis Andria
836 4097 IT3C US Catanzaro
837 31255 IT3C SS Akragas Città dei Templi
838 22330 IT3C SS Ischia Isolaverde
839 19164 IT3C Paganese Calcio 1926
840 4558 IT3C AS Martina Franca
841 32013 IT3C AS Lupa Castelli Romani

842 rows × 3 columns


In [12]:
a = [1,2,3]
a.extend([4,5,6])
a


Out[12]:
[1, 2, 3, 4, 5, 6]

In [ ]:
DataFrame(getTransfersByPlayerId(121483))

In [ ]:
DataFrame(getClubsByLeagueId('GB1',2010))  #.sort_values(by="clubId")

In [ ]:
DataFrame(getPlayersByClubId(418,2005))