In [1]:

    
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -d









    



Sebastian Raschka 03/01/2015 

CPython 3.4.2
IPython 2.3.1

Collecting Premier League Data

Sections

dreamteamfc.com

[back to top]

Getting General Player Statistics

[back to top]



In [1]:

    
import pandas as pd
from bs4 import BeautifulSoup
import bs4
import requests



In [3]:

    
# Downloading and parsing the data into a Python dict

player_dict = {}

url = 'https://www.dreamteamfc.com/statistics/players/ALL/'
r  = requests.get(url)
soup = BeautifulSoup(r.text, 'html5lib') 
# Note: html5lib deals better with broken html than lxml

name_list = []

for td in soup.findAll("td", { "class" : "tabName" }):
    name = td.text.split('Statistics')[-1].strip()
    if name:
        name_list.append(name)
        res = [i.text for i in td.next_siblings if isinstance(i, bs4.element.Tag)]
        position, team, vfm, value, points = res
        value = value.strip('m')
        player_dict[name] = [name, position, team, vfm, value, points]
        
print('Found: %s' % len(name_list))
print(name_list[-1])









    



Found: 401
O'Brien, Joey



In [4]:

    
# Reading the data into a pandas DataFrame

df = pd.DataFrame.from_dict(player_dict, orient='index')
df.columns = ['name', 'position', 'team', 'vfm', 'value', 'pts']
df[['vfm','value']] = df[['vfm','value']].astype(float)
df[['pts']] = df[['pts']].astype(int)
df.tail()









    Out[4]:






  
    
      
      name
      position
      team
      vfm
      value
      pts
    
  
  
    
      Sigurdsson, Gylfi
       Sigurdsson, Gylfi
       MID
       SWA
       28.67
       3.0
       86
    
    
      van Ginkel, Marco
       van Ginkel, Marco
       MID
       CHE
        0.00
       2.0
        0
    
    
      Chamakh, Marouane
       Chamakh, Marouane
       STR
       CRY
        2.67
       1.5
        4
    
    
      Ince, Tom
               Ince, Tom
       MID
       HUL
        6.50
       2.0
       13
    
    
      Ireland, Stephen
        Ireland, Stephen
       MID
       STO
        4.00
       1.0
        4



In [5]:

    
df.describe()

Getting Injuries and Cards Information

[back to top]



In [6]:

    
df['status'] = pd.Series('', index=df.index)
df['description'] = pd.Series('', index=df.index)
df['returns'] = pd.Series('', index=df.index)



In [7]:

    
url = 'https://www.dreamteamfc.com/statistics/injuries-and-cards/ALL/'
r  = requests.get(url)
soup = BeautifulSoup(r.text, 'html5lib')

name_list = []

for td in soup.findAll("td", { "class" : "tabName2" }):
    name = td.text.split('stats')[-1].strip()
    if name:
        name_list.append(name)
        res = [i.text for i in td.next_siblings if isinstance(i, bs4.element.Tag)]
        position, team, status, description, returns = res
        df.loc[df.index==name,['status', 'description', 'returns']] = status, description, returns
        
print('Found: %s' % len(name_list))
print(name_list[-1])









    



Found: 81
Fernando



In [8]:

    
df.tail()









    Out[8]:






  
    
      
      name
      position
      team
      vfm
      value
      pts
      status
      description
      returns
    
  
  
    
      Sigurdsson, Gylfi
       Sigurdsson, Gylfi
       MID
       SWA
       28.67
       3.0
       86
                  
                                                        
                 
    
    
      van Ginkel, Marco
       van Ginkel, Marco
       MID
       CHE
        0.00
       2.0
        0
       Unavailable
                     Joined AC Milan on season-long loan
       25/05/2015
    
    
      Chamakh, Marouane
       Chamakh, Marouane
       STR
       CRY
        2.67
       1.5
        4
           Injured
       Sustained in the encounter with Stoke on 13/12...
       26/12/2014
    
    
      Ince, Tom
               Ince, Tom
       MID
       HUL
        6.50
       2.0
       13
                  
                                                        
                 
    
    
      Ireland, Stephen
        Ireland, Stephen
       MID
       STO
        4.00
       1.0
        4
          Doubtful
               Rated a doubt for 22/12 visit of Chelsea.
       01/01/2015

Getting Player Form Information

[back to top]



In [9]:

    
df['month_pts'] = pd.Series(0, index=df.index)
df['week_pts'] = pd.Series(0, index=df.index)



In [11]:

    
url = 'https://www.dreamteamfc.com/statistics/form-guide/all'
r  = requests.get(url)
soup = BeautifulSoup(r.text, 'html5lib')

name_list = []

for td in soup.findAll("td", { "class" : "tabName" }):
    name = td.text.strip()
    if name:
        name_list.append(name)
        
        res = [i.text for i in td.next_siblings if isinstance(i, bs4.element.Tag)]
        try:
            month_pts, week_pts = float(res[-2]), float(res[-1])
            df.loc[df.index==name, ['month_pts', 'week_pts']] = month_pts, week_pts
        except ValueError:
            pass
        
print('Found: %s' % len(name_list))
print(name_list[-1])









    



Found: 401
O'Brien, Joey



In [13]:

    
# Reordering the columns

df = df[['name', 'position', 'team', 'vfm', 'value', 'pts', 'month_pts', 
         'week_pts', 'status', 'description', 'returns']]

df.tail()









    Out[13]:






  
    
      
      name
      position
      team
      vfm
      value
      pts
      month_pts
      week_pts
      status
      description
      returns
    
  
  
    
      Sigurdsson, Gylfi
       Sigurdsson, Gylfi
       MID
       SWA
       28.67
       3.0
       86
       28
       5
                  
                                                        
                 
    
    
      van Ginkel, Marco
       van Ginkel, Marco
       MID
       CHE
        0.00
       2.0
        0
        0
       0
       Unavailable
                     Joined AC Milan on season-long loan
       25/05/2015
    
    
      Chamakh, Marouane
       Chamakh, Marouane
       STR
       CRY
        2.67
       1.5
        4
       -1
       0
           Injured
       Sustained in the encounter with Stoke on 13/12...
       26/12/2014
    
    
      Ince, Tom
               Ince, Tom
       MID
       HUL
        6.50
       2.0
       13
        0
       0
                  
                                                        
                 
    
    
      Ireland, Stephen
        Ireland, Stephen
       MID
       STO
        4.00
       1.0
        4
       -1
       0
          Doubtful
               Rated a doubt for 22/12 visit of Chelsea.
       01/01/2015

Saving the Data to CSV

[back to top]



In [13]:

    
# Getting the current time stamp for the data

from datetime import datetime

url = 'https://www.dreamteamfc.com/statistics/players/ALL/'
r  = requests.get(url)
data = r.text
soup = BeautifulSoup(data)

raw_date = soup.find('li', {'class' : 'pointsupdateinfo' }).text
raw_date = raw_date.split()[-1].replace('/', '').strip()
d = datetime.strptime(raw_date, '%d%m%Y').date()
date = d.strftime('%Y%m%d')
print(date)

20141220



In [14]:

    
df.to_csv('../data/dreamteamfc_%s.csv' % date, index=False)

espnfc.com

[back to top]

Getting Team Ranks and Stats

[back to top]



In [2]:

    
import pandas as pd
from bs4 import BeautifulSoup
import bs4
import requests



In [13]:

    
# Downloading and parsing the data into a Python dict

team_dict = {}

url = 'http://www.espnfc.com/barclays-premier-league/23/table'
r  = requests.get(url)
soup = BeautifulSoup(r.text, 'html5lib') 
# Note: html5lib deals better with broken html than lxml

for td in soup.findAll('td', { 'class' : 'pos' }):
    rank = int(td.text)
    res = [i.text for i in td.next_siblings if isinstance(i, bs4.element.Tag) and i.text!='\xa0']
    team_name = res[0].strip()
    values = [int(i) for i in res[1:]]
    team_dict[team_name] = [rank] + values

Column legend:

Pos: POSITION
P: GAMES PLAYED
W: WINS
D: DRAWS
L: LOSSES
F: GOALS FOR
A: GOALS AGAINST
GD: GOAL DIFFERENCE
PTS: POINTS

suffixes:

_ov: OVERALL
_hm: HOME GAMES
_aw: AWAY GAMES



In [14]:

    
df = pd.DataFrame.from_dict(team_dict, orient='index')
cols = ['Pos','P_ov','W_ov','D_ov','L_ov','F_ov','A_ov',
            'W_hm','D_hm','L_hm','F_hm','A_hm', 'W_aw',
            'D_aw','L_aw','F_aw','A_aw','GD','PTS']
df.columns = cols
df = df.sort('Pos')
df['team'] = df.index
df = df[['team']+cols]
df









    Out[14]:






  
    
      
      team
      Pos
      P_ov
      W_ov
      D_ov
      L_ov
      F_ov
      A_ov
      W_hm
      D_hm
      L_hm
      F_hm
      A_hm
      W_aw
      D_aw
      L_aw
      F_aw
      A_aw
      GD
      PTS
    
  
  
    
      Chelsea
                    Chelsea
        1
       20
       14
        4
        2
       44
       19
       9
       0
       0
       22
        3
       5
       4
       2
       22
       16
       25
       46
    
    
      Manchester City
            Manchester City
        2
       20
       14
        4
        2
       44
       19
       7
       2
       1
       20
        9
       7
       2
       1
       24
       10
       25
       46
    
    
      Manchester United
          Manchester United
        3
       20
       10
        7
        3
       34
       20
       8
       1
       1
       22
        7
       2
       6
       2
       12
       13
       14
       37
    
    
      Southampton
                Southampton
        4
       20
       11
        3
        6
       34
       15
       7
       2
       2
       24
        7
       4
       1
       4
       10
        8
       19
       36
    
    
      Tottenham Hotspur
          Tottenham Hotspur
        5
       20
       10
        4
        6
       29
       27
       5
       2
       4
       16
       13
       5
       2
       2
       13
       14
        2
       34
    
    
      Arsenal
                    Arsenal
        6
       20
        9
        6
        5
       34
       25
       5
       3
       1
       18
       10
       4
       3
       4
       16
       15
        9
       33
    
    
      West Ham United
            West Ham United
        7
       20
        9
        5
        6
       31
       24
       6
       2
       3
       16
       10
       3
       3
       3
       15
       14
        7
       32
    
    
      Liverpool
                  Liverpool
        8
       20
        8
        5
        7
       28
       27
       4
       5
       2
       15
       11
       4
       0
       5
       13
       16
        1
       29
    
    
      Swansea City
               Swansea City
        9
       20
        8
        5
        7
       25
       24
       6
       2
       2
       15
        7
       2
       3
       5
       10
       17
        1
       29
    
    
      Newcastle United
           Newcastle United
       10
       20
        7
        6
        7
       25
       31
       5
       3
       2
       16
       14
       2
       3
       5
        9
       17
       -6
       27
    
    
      Stoke City
                 Stoke City
       11
       20
        7
        5
        8
       22
       24
       4
       2
       4
       12
       12
       3
       3
       4
       10
       12
       -2
       26
    
    
      Aston Villa
                Aston Villa
       12
       20
        5
        7
        8
       11
       22
       2
       5
       3
        7
       11
       3
       2
       5
        4
       11
      -11
       22
    
    
      Everton
                    Everton
       13
       20
        5
        6
        9
       29
       33
       3
       3
       3
       16
       15
       2
       3
       6
       13
       18
       -4
       21
    
    
      Sunderland
                 Sunderland
       14
       20
        3
       11
        6
       18
       30
       1
       6
       3
       10
       15
       2
       5
       3
        8
       15
      -12
       20
    
    
      Hull City
                  Hull City
       15
       20
        4
        7
        9
       20
       26
       2
       3
       5
       10
       12
       2
       4
       4
       10
       14
       -6
       19
    
    
      Queens Park Rangers
        Queens Park Rangers
       16
       20
        5
        4
       11
       22
       35
       5
       4
       2
       18
       13
       0
       0
       9
        4
       22
      -13
       19
    
    
      West Bromwich Albion
       West Bromwich Albion
       17
       20
        4
        6
       10
       19
       29
       2
       3
       5
       13
       16
       2
       3
       5
        6
       13
      -10
       18
    
    
      Crystal Palace
             Crystal Palace
       18
       20
        3
        8
        9
       20
       30
       2
       2
       5
       10
       14
       1
       6
       4
       10
       16
      -10
       17
    
    
      Burnley
                    Burnley
       19
       20
        3
        8
        9
       17
       32
       2
       4
       4
        7
       12
       1
       4
       5
       10
       20
      -15
       17
    
    
      Leicester City
             Leicester City
       20
       20
        3
        5
       12
       19
       33
       1
       4
       4
       12
       15
       2
       1
       8
        7
       18
      -14
       14

Saving ESPN Data to CSV

[back to top]



In [12]:

    
df.to_csv('../data/2014_epl_day_17/espn_20141222.csv', index=False)

Getting Top Scorer

[back to top]



In [10]:

    
# Downloading and parsing the data into a Python dict

player_dict = {}

url = 'http://www.espnfc.com/barclays-premier-league/23/statistics/scorers'

r  = requests.get(url)
soup = BeautifulSoup(r.text, 'html5lib') 
# Note: html5lib deals better with broken html than lxml

for td in soup.findAll('td', { 'headers' : 'player' }):
    name = td.text
    team, goals = [i.text for i in td.next_siblings if isinstance(i, bs4.element.Tag) and i.text!='\xa0']
    player_dict[name] = [team, int(goals)]
    
df_essc = pd.DataFrame.from_dict(player_dict, orient='index')
df_essc['name'] = df_essc.index
df_essc.columns = ['team', 'goals', 'name']
df_essc = df_essc[['name', 'team', 'goals']]
df_essc.sort('goals', ascending=False, inplace=True)
df_essc.head()









    Out[10]:






  
    
      
      name
      team
      goals
    
  
  
    
      Diego Costa
              Diego Costa
                   Chelsea
       17
    
    
      Sergio Agüero
            Sergio Agüero
           Manchester City
       14
    
    
      Charlie Austin
           Charlie Austin
       Queens Park Rangers
       13
    
    
      Alexis Sánchez
           Alexis Sánchez
                   Arsenal
       12
    
    
      Papiss Demba Cisse
       Papiss Demba Cisse
          Newcastle United
        9

Getting Top Assists

[back to top]



In [8]:

    
player_dict = {}

url = 'http://www.espnfc.com/barclays-premier-league/23/statistics/assists'

r  = requests.get(url)
soup = BeautifulSoup(r.text, 'html5lib') 
# Note: html5lib deals better with broken html than lxml

for td in soup.findAll('td', { 'headers' : 'player' }):
    name = td.text
    team, assists = [i.text for i in td.next_siblings if isinstance(i, bs4.element.Tag) and i.text!='\xa0']
    player_dict[name] = [team, int(assists)]
    
df_esas = pd.DataFrame.from_dict(player_dict, orient='index')
df_esas['name'] = df_esas.index
df_esas.columns = ['team', 'assists', 'name']
df_esas = df_esas[['name', 'team', 'assists']]
df_esas.sort('assists', ascending=False, inplace=True)
df_esas.head()









    Out[8]:






  
    
      
      name
      team
      assists
    
  
  
    
      Cesc Fàbregas
          Cesc Fàbregas
               Chelsea
       15
    
    
      Gylfi Sigurdsson
       Gylfi Sigurdsson
          Swansea City
        8
    
    
      Leighton Baines
        Leighton Baines
               Everton
        8
    
    
      Stewart Downing
        Stewart Downing
       West Ham United
        7
    
    
      Dusan Tadic
            Dusan Tadic
           Southampton
        7

365stats.com

[back to top]

Getting Injury Data

[back to top]



In [1]:

    
import pandas as pd
from bs4 import BeautifulSoup
import bs4
import requests



In [2]:

    
# Downloading and parsing the data into a Python dict

injury_dict = {}

url = 'http://365stats.com/football/injuries'
r  = requests.get(url)
soup = BeautifulSoup(r.text, 'html5lib') 
# Note: html5lib deals better with broken html than lxml

for td in soup.findAll('td', { 'nowrap' : 'nowrap' }):
    name = td.text.split()
    player_info = ['%s, %s' % (' '.join(name[1:]), name[0])]
    for i in td.next_siblings:
        if isinstance(i, bs4.Tag):
            player_info.append(i.text)
    injury_dict[player_info[0]] = player_info[1:3]



In [3]:

    
df = pd.DataFrame.from_dict(injury_dict, orient='index')
df.columns=['injury', 'returns']
df['name'] = df.index
df = df[['name', 'injury', 'returns']]
df.tail()









    Out[3]:






  
    
      
      name
      injury
      returns
    
  
  
    
      Osman, L
         Osman, L
          Calf/Shin Injury
       no date
    
    
      Hibbert, T
       Hibbert, T
          Muscular Injury 
       2 Weeks
    
    
      Moses, V
         Moses, V
       Thigh Muscle Strain
       2 Weeks
    
    
      Taarabt, A
       Taarabt, A
       Groin/Pelvis Injury
       no date
    
    
      Ward, S
          Ward, S
         Ankle/Foot Injury
       no date

Saving 365stats Data to CSV

[back to top]



In [4]:

    
df.to_csv('../data/2014_epl_day_17/365stats_injury_20141222.csv')

Transfermarkt.com

[back to top]

Getting Home and Away Teams

[back to top]



In [1]:

    
import pandas as pd
from bs4 import BeautifulSoup
import bs4
import requests



In [2]:

    
# Downloading and parsing the data into a Python dict   
    
url = 'http://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1'

s = requests.Session()
s.headers['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36'
s.headers['Host'] = 'www.transfermarkt.com'
url = 'http://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1'
r = s.get(url)

soup = BeautifulSoup(r.text, 'html5lib') 
# Note: html5lib deals better with broken html than lxml

# Find tab for the upcoming fixtures
tab = 'spieltagtabs-2'
div = soup.find('div', { 'id' : tab })
tit = div.findAll('a', { 'class' : 'ergebnis-link' })
if len(tit) > 0:
    tab = 'spieltagtabs-3'

# Get fixtures
home = []
away = []

div = soup.find('div', { 'id' : tab })
for t in div.findAll('td', { 'class' : 'text-right no-border-rechts no-border-links' }):
    team = t.text.strip()
    if team:
        home.append(team)
for t in div.findAll('td', { 'class' : 'no-border-links no-border-rechts' }):
    team = t.text.strip()
    if team:
        away.append(team)


df = pd.DataFrame(home, columns=['home'])
df['away'] = away
df









    Out[2]:






  
    
      
      home
      away
    
  
  
    
      0
      Norwich
      Manchester City
    
    
      1
      Manchester Utd.
      Crystal Palace
    
    
      2
      Bournemouth
      Swansea
    
    
      3
      Stoke City
      Southampton FC
    
    
      4
      West Ham
      Watford
    
    
      5
      Sunderland
      Everton
    
    
      6
      Arsenal FC
      West Brom
    
    
      7
      Aston Villa
      Spurs
    
    
      8
      Liverpool
      Chelsea FC
    
    
      9
      Leicester City
      Newcastle

Saving Home and Away Teams to CSV

[back to top]



In [11]:

    
df.to_csv('../data/2014_epl_day_19/transfermarkt_20141227.csv', index=False)

premierleague.com

[back to top]



In [2]:

    
import pandas as pd
from bs4 import BeautifulSoup
import bs4
import requests



In [9]:

    
# Downloading and parsing the data into a Python dict

url = 'http://www.premierleague.com/en-gb/matchday.html'
r  = requests.get(url)
soup = BeautifulSoup(r.text, 'html5lib') 
# Note: html5lib deals better with broken html than lxml

home = []
away = []

for t in soup.findAll('td', { 'width' : '30%' }):
    team = t.text.strip().split(' v ')
    print(team)









    



['Spurs', 'Man Utd']
['Southampton', 'Chelsea']
['Aston Villa', 'Sunderland']
['Hull', 'Leicester']
['Man City', 'Burnley']
['QPR', 'Crystal Palace']
['Stoke', 'West Brom']
['West Ham', 'Arsenal']
['Newcastle', 'Everton']
['Leicester', 'Newcastle']
['Tranmere', 'Swansea']
['West Brom', 'Gateshead']
['Liverpool', 'Chelsea']
['Paris SG', 'Chelsea']
['BSC Young Boys', 'Everton']
['Liverpool', 'Besiktas']
['Spurs', 'Fiorentina']

telegraph.co.uk

[back to top]

Getting Current Week Points

[back to top]



In [1]:

    
import pandas as pd
from bs4 import BeautifulSoup
import bs4
import requests



In [15]:

    
url = 'https://fantasyfootball.telegraph.co.uk/premierleague/players/'
r  = requests.get(url)
soup = BeautifulSoup(r.text, 'html5lib') 
# Note: html5lib deals better with broken html than lxml

player_dict = {}

for t in soup.findAll('td', { 'class' : 'first' }):
    player = t.text.strip()
    player_dict[player] = []
    for s in t.next_siblings:
        if isinstance(s, bs4.Tag):
            player_dict[player].append(s.text)

# parse the player dictionary
df = pd.DataFrame.from_dict(player_dict, orient='index')

# make name column
df['name'] = df.index

# assign column names and reorder columns
df.columns = ['team', 'salary', 'pts/salary', 'week_pts', 'total_pts', 'name']
df = df[['name', 'team', 'salary', 'pts/salary', 'week_pts', 'total_pts']]

# parse data into the right format
df['salary'] = df['salary'].apply(lambda x: x.strip('£').strip(' m'))
df[['salary', 'pts/salary']] = df[['salary', 'pts/salary']].astype(float)
df[['week_pts', 'total_pts']] = df[['week_pts', 'total_pts']].astype(int)

print(df.shape)
df.tail()









    



(548, 6)






    Out[15]:






  
    
      
      name
      team
      salary
      pts/salary
      week_pts
      total_pts
    
  
  
    
      Januzaj, A
        Januzaj, A
          Manchester United
       3.9
        3.8
        1
       15
    
    
      Grealish, J
       Grealish, J
                Aston Villa
       2.5
        4.0
        3
       10
    
    
      Anichebe, V
       Anichebe, V
       West Bromwich Albion
       4.0
        8.2
       10
       33
    
    
      Hibbert, T
        Hibbert, T
                    Everton
       2.2
        3.2
        0
        7
    
    
      Coutinho, P
       Coutinho, P
                  Liverpool
       4.4
       11.1
        2
       49

Getting 6-Week Points

[back to top]



In [16]:

    
url = 'https://fantasyfootball.telegraph.co.uk/premierleague/formguide/'
r  = requests.get(url)
soup = BeautifulSoup(r.text, 'html5lib') 
# Note: html5lib deals better with broken html than lxml

df['6week_pts'] = pd.Series(0, index=df.index)

for t in soup.findAll('td', { 'class' : 'first' }):
    player = t.text.strip()
    if player:
        week6 = t.parent.find('td', { 'class' : 'sixth last' })
        df.loc[df['name'] == player, '6week_pts'] = week6.text

df.tail()









    Out[16]:






  
    
      
      name
      team
      salary
      pts/salary
      week_pts
      total_pts
      6week_pts
    
  
  
    
      Januzaj, A
        Januzaj, A
          Manchester United
       3.9
        3.8
        1
       15
        2
    
    
      Grealish, J
       Grealish, J
                Aston Villa
       2.5
        4.0
        3
       10
        7
    
    
      Anichebe, V
       Anichebe, V
       West Bromwich Albion
       4.0
        8.2
       10
       33
       16
    
    
      Hibbert, T
        Hibbert, T
                    Everton
       2.2
        3.2
        0
        7
        4
    
    
      Coutinho, P
       Coutinho, P
                  Liverpool
       4.4
       11.1
        2
       49
       27

Saving telegraph.co.uk to CSV

[back to top]



In [39]:

    
df.to_csv('../data/2014_epl_day_20/telegraph_20141229.csv', index=False)

m.premierleague.com

[back to top]

Combined Form of Previous 6 Days

[back to top]



In [2]:

    
import pandas as pd
from bs4 import BeautifulSoup
import bs4
import requests



In [3]:

    
url = 'http://m.premierleague.com/en-gb/form-guide.html'
r  = requests.get(url)
soup = BeautifulSoup(r.text, 'html5lib') 
# Note: html5lib deals better with broken html than lxml

#df['6week_pts'] = pd.Series(0, index=df.index)

team_dict = {}

for d in soup.findAll('td', { 'class' : 'col-pos' }):
    if len(team_dict) > 20:
        break
    pos = d.text
    for e in d.next_siblings:
        if isinstance(e, bs4.Tag):
            if 'class' in e.attrs and 'col-club' in e.attrs['class']:
                club = e.text
                team_dict[club] = pos
                break

df = pd.DataFrame.from_dict(team_dict, orient='index')
        
df.columns = ['position-last-6-games']
df['team'] = df.index
df.tail()









    Out[3]:






  
    
      
      position-last-6-games
      team
    
  
  
    
      West Brom
       15
            West Brom
    
    
      Hull
       12
                 Hull
    
    
      Southampton
        9
          Southampton
    
    
      Newcastle
       11
            Newcastle
    
    
      Crystal Palace
       18
       Crystal Palace

Saving m.premierleague.com to CSV

[back to top]



In [26]:

    
df.to_csv('../data/2014_epl_day_20/mpremierleague_20141230.csv', index=False)

fantasyfootballscout.co.uk

[back to top]

Predicted Line-Ups

[back to top]



In [31]:

    
import pandas as pd
from bs4 import BeautifulSoup
import bs4
import requests



In [85]:

    
url = 'http://www.fantasyfootballscout.co.uk/team-news/'
r  = requests.get(url)
soup = BeautifulSoup(r.text, 'html5lib') 
# Note: html5lib deals better with broken html than lxml


team_dict = {}

for li in soup.findAll('li'):
    for h2 in li.findAll('h2'):
        team = h2.text
        team_dict[team] = []
        for p in li.findAll('span', { 'class' : 'player-name' }):
            player = p.text
            team_dict[team].append(player)
                    
df = pd.DataFrame.from_dict(team_dict)
df.tail()









    Out[85]:






  
    
      
      Arsenal
      Aston Villa
      Burnley
      Chelsea
      Crystal Palace
      Everton
      Hull City
      Leicester City
      Liverpool
      Manchester City
      Manchester United
      Newcastle United
      Queens Park Rangers
      Southampton
      Stoke City
      Sunderland
      Swansea City
      Tottenham Hotspur
      West Bromwich Albion
      West Ham United
    
  
  
    
      6 
                  Flamini
          Sánchez
        Jones
             Matic
         McArthur
          Barry
          Meyler
           Mahrez
          Lucas
       Fernandinho
          Carrick
             Tioté
        Henry
       Wanyama
                 Whelan
        Johnson
               Ki
       Bentaleb
        Gardner
         Nolan
    
    
      7 
                 Coquelin
        Cleverley
       Marney
           Willian
       Joe Ledley
        Barkley
       Livermore
       Drinkwater
         Moreno
           Y Touré
           Rooney
       Ayoze Pérez
       Barton
         Davis
       Mame Biram Diouf
        Larsson
             Dyer
         Lamela
       Morrison
         Noble
    
    
      8 
       Oxlade-Chamberlain
         N'Zogbia
         Boyd
             Oscar
          Bolasie
       Naismith
           Brady
            James
       Coutinho
             Nasri
            Young
           Sissoko
          Fer
          Mane
                Walters
          Gómez
       Sigurdsson
        Eriksen
          Brunt
       Downing
    
    
      9 
                  Sánchez
          Benteke
       Barnes
            Hazard
             Zaha
       Mirallas
           Aluko
            Vardy
       Sterling
             Silva
           Falcao
          Gouffran
       Zamora
         Tadic
             Arnautovic
        Wickham
        Routledge
         Chadli
        Dorrans
         Sakho
    
    
      10
                  Cazorla
       Agbonlahor
         Ings
       Diego Costa
         Campbell
         Lukaku
         Jelavic
            Ulloa
        Lallana
           Jovetic
       van Persie
         Armstrong
       Austin
         Pellè
                 Crouch
       Fletcher
             Bony
           Kane
       Berahino
       Carroll

Saving fantasyfootballscout.co.uk to CSV

[back to top]



In [86]:

    
df.to_csv('../data/epl_1314_21/fantasyfootballscout.csv', index=False)

	vfm	value	pts
count	401.000000	401.000000	401.000000
mean	11.185661	2.770574	29.581047
std	10.259686	1.416327	27.582405
min	-13.000000	1.000000	-13.000000
25%	3.600000	1.500000	9.000000
50%	9.330000	2.500000	24.000000
75%	15.850000	3.500000	43.000000
max	93.330000	7.500000	167.000000

	name	position	team	vfm	value	pts
Sigurdsson, Gylfi	Sigurdsson, Gylfi	MID	SWA	28.67	3.0	86
van Ginkel, Marco	van Ginkel, Marco	MID	CHE	0.00	2.0	0
Chamakh, Marouane	Chamakh, Marouane	STR	CRY	2.67	1.5	4
Ince, Tom	Ince, Tom	MID	HUL	6.50	2.0	13
Ireland, Stephen	Ireland, Stephen	MID	STO	4.00	1.0	4

	team	Pos	P_ov	W_ov	D_ov	L_ov	F_ov	A_ov	W_hm	D_hm	L_hm	F_hm	A_hm	W_aw	D_aw	L_aw	F_aw	A_aw	GD	PTS
Chelsea	Chelsea	1	20	14	4	2	44	19	9	0	0	22	3	5	4	2	22	16	25	46
Manchester City	Manchester City	2	20	14	4	2	44	19	7	2	1	20	9	7	2	1	24	10	25	46
Manchester United	Manchester United	3	20	10	7	3	34	20	8	1	1	22	7	2	6	2	12	13	14	37
Southampton	Southampton	4	20	11	3	6	34	15	7	2	2	24	7	4	1	4	10	8	19	36
Tottenham Hotspur	Tottenham Hotspur	5	20	10	4	6	29	27	5	2	4	16	13	5	2	2	13	14	2	34
Arsenal	Arsenal	6	20	9	6	5	34	25	5	3	1	18	10	4	3	4	16	15	9	33
West Ham United	West Ham United	7	20	9	5	6	31	24	6	2	3	16	10	3	3	3	15	14	7	32
Liverpool	Liverpool	8	20	8	5	7	28	27	4	5	2	15	11	4	0	5	13	16	1	29
Swansea City	Swansea City	9	20	8	5	7	25	24	6	2	2	15	7	2	3	5	10	17	1	29
Newcastle United	Newcastle United	10	20	7	6	7	25	31	5	3	2	16	14	2	3	5	9	17	-6	27
Stoke City	Stoke City	11	20	7	5	8	22	24	4	2	4	12	12	3	3	4	10	12	-2	26
Aston Villa	Aston Villa	12	20	5	7	8	11	22	2	5	3	7	11	3	2	5	4	11	-11	22
Everton	Everton	13	20	5	6	9	29	33	3	3	3	16	15	2	3	6	13	18	-4	21
Sunderland	Sunderland	14	20	3	11	6	18	30	1	6	3	10	15	2	5	3	8	15	-12	20
Hull City	Hull City	15	20	4	7	9	20	26	2	3	5	10	12	2	4	4	10	14	-6	19
Queens Park Rangers	Queens Park Rangers	16	20	5	4	11	22	35	5	4	2	18	13	0	0	9	4	22	-13	19
West Bromwich Albion	West Bromwich Albion	17	20	4	6	10	19	29	2	3	5	13	16	2	3	5	6	13	-10	18
Crystal Palace	Crystal Palace	18	20	3	8	9	20	30	2	2	5	10	14	1	6	4	10	16	-10	17
Burnley	Burnley	19	20	3	8	9	17	32	2	4	4	7	12	1	4	5	10	20	-15	17
Leicester City	Leicester City	20	20	3	5	12	19	33	1	4	4	12	15	2	1	8	7	18	-14	14

	name	team	goals
Diego Costa	Diego Costa	Chelsea	17
Sergio Agüero	Sergio Agüero	Manchester City	14
Charlie Austin	Charlie Austin	Queens Park Rangers	13
Alexis Sánchez	Alexis Sánchez	Arsenal	12
Papiss Demba Cisse	Papiss Demba Cisse	Newcastle United	9

	name	team	assists
Cesc Fàbregas	Cesc Fàbregas	Chelsea	15
Gylfi Sigurdsson	Gylfi Sigurdsson	Swansea City	8
Leighton Baines	Leighton Baines	Everton	8
Stewart Downing	Stewart Downing	West Ham United	7
Dusan Tadic	Dusan Tadic	Southampton	7

	name	injury	returns
Osman, L	Osman, L	Calf/Shin Injury	no date
Hibbert, T	Hibbert, T	Muscular Injury	2 Weeks
Moses, V	Moses, V	Thigh Muscle Strain	2 Weeks
Taarabt, A	Taarabt, A	Groin/Pelvis Injury	no date
Ward, S	Ward, S	Ankle/Foot Injury	no date

	home	away
0	Norwich	Manchester City
1	Manchester Utd.	Crystal Palace
2	Bournemouth	Swansea
3	Stoke City	Southampton FC
4	West Ham	Watford
5	Sunderland	Everton
6	Arsenal FC	West Brom
7	Aston Villa	Spurs
8	Liverpool	Chelsea FC
9	Leicester City	Newcastle

	name	team	salary	pts/salary	week_pts	total_pts
Januzaj, A	Januzaj, A	Manchester United	3.9	3.8	1	15
Grealish, J	Grealish, J	Aston Villa	2.5	4.0	3	10
Anichebe, V	Anichebe, V	West Bromwich Albion	4.0	8.2	10	33
Hibbert, T	Hibbert, T	Everton	2.2	3.2	0	7
Coutinho, P	Coutinho, P	Liverpool	4.4	11.1	2	49

	position-last-6-games	team
West Brom	15	West Brom
Hull	12	Hull
Southampton	9	Southampton
Newcastle	11	Newcastle
Crystal Palace	18	Crystal Palace

	Arsenal	Aston Villa	Burnley	Chelsea	Crystal Palace	Everton	Hull City	Leicester City	Liverpool	Manchester City	Manchester United	Newcastle United	Queens Park Rangers	Southampton	Stoke City	Sunderland	Swansea City	Tottenham Hotspur	West Bromwich Albion	West Ham United
6	Flamini	Sánchez	Jones	Matic	McArthur	Barry	Meyler	Mahrez	Lucas	Fernandinho	Carrick	Tioté	Henry	Wanyama	Whelan	Johnson	Ki	Bentaleb	Gardner	Nolan
7	Coquelin	Cleverley	Marney	Willian	Joe Ledley	Barkley	Livermore	Drinkwater	Moreno	Y Touré	Rooney	Ayoze Pérez	Barton	Davis	Mame Biram Diouf	Larsson	Dyer	Lamela	Morrison	Noble
8	Oxlade-Chamberlain	N'Zogbia	Boyd	Oscar	Bolasie	Naismith	Brady	James	Coutinho	Nasri	Young	Sissoko	Fer	Mane	Walters	Gómez	Sigurdsson	Eriksen	Brunt	Downing
9	Sánchez	Benteke	Barnes	Hazard	Zaha	Mirallas	Aluko	Vardy	Sterling	Silva	Falcao	Gouffran	Zamora	Tadic	Arnautovic	Wickham	Routledge	Chadli	Dorrans	Sakho
10	Cazorla	Agbonlahor	Ings	Diego Costa	Campbell	Lukaku	Jelavic	Ulloa	Lallana	Jovetic	van Persie	Armstrong	Austin	Pellè	Crouch	Fletcher	Bony	Kane	Berahino	Carroll