Define all the libraries which needs to be set for operations here
In [1]:
library("XML")
library("methods")
library("dplyr")
library("tidyr")
Define the database name
In [2]:
dbName = toString("database.sqlite")
Connect to the database
In [3]:
my_db <- src_sqlite(dbName, create = FALSE)
Query the Player table
In [4]:
playerTable = tbl(my_db, sql("SELECT player_api_id,player_name FROM Player"))
Query the Player Attributes table
In [5]:
playerAttrTable = tbl(my_db, sql("SELECT player_api_id,overall_rating,gk_diving,gk_handling,gk_kicking,gk_positioning,gk_reflexes FROM Player_Attributes"))
Add the gk_rating field to the table using mutate and gk fields
In [6]:
playerAttrTable = mutate(playerAttrTable, gk_rating = (gk_diving + gk_handling + gk_kicking + gk_positioning + gk_reflexes))
Group all the players by their id and find the different max ratings
In [7]:
playerAttrTable = playerAttrTable %>%
group_by(player_api_id) %>%
summarize(overall_rating = max(overall_rating), gk_rating = max(gk_rating))
Print the table content
In [8]:
playerAttrTable
Left Join the tables by player id
In [9]:
playerData = left_join(playerTable, playerAttrTable, by = "player_api_id")
Print the data after join
In [10]:
playerData
Query the Match table
In [11]:
matchTable = tbl(my_db, sql("SELECT league_id,home_team_api_id,away_team_api_id,home_team_goal,away_team_goal,home_player_1,home_player_2,home_player_3,home_player_4,home_player_5,home_player_6,home_player_7,home_player_8,home_player_9,home_player_10,home_player_11,away_player_1,away_player_2,away_player_3,away_player_4,away_player_5,away_player_6,away_player_7,away_player_8,away_player_9,away_player_10,away_player_11 FROM Match"))
Group the Match table based on the player id
In [12]:
playerMatchDataTable = gather(tbl_df(matchTable), players, player_api_id, home_player_1 : away_player_11)
Group the players based on their id and summarize the attributes
In [13]:
playerMatchesDF = playerMatchDataTable %>%
group_by(player_api_id) %>%
summarize(appearances = n(),league_appearances = n_distinct(league_id))
Print the data
In [14]:
playerMatchesDF
Join the two tables now based on the player id
In [15]:
playersFinalData = left_join(tbl_df(playerData), playerMatchesDF, by = "player_api_id")
Print the data
In [16]:
head(playersFinalData,n=10)
Query the Team table
In [17]:
teamTable = tbl(my_db, sql("SELECT team_api_id,team_long_name FROM Team"))
Query the Team attribute table
In [18]:
teamAttrTable = tbl(my_db, sql("SELECT team_api_id,buildUpPlaySpeed,buildUpPlayDribbling,buildUpPlayPassing,chanceCreationPassing,chanceCreationCrossing,chanceCreationShooting,defencePressure,defenceAggression,defenceTeamWidth FROM Team_Attributes"))
Add the extra fields using different tables
In [19]:
teamAttrTable = mutate(teamAttrTable, chance_creation = (chanceCreationPassing + chanceCreationCrossing + chanceCreationShooting), build_up_play =(buildUpPlaySpeed +buildUpPlayDribbling+buildUpPlayPassing), defense = (defencePressure+defenceAggression+defenceTeamWidth))
Group the team table based on the team id and max Values
In [20]:
teamAttrTable = teamAttrTable %>%
group_by(team_api_id) %>%
summarize(chance_creation = max(chance_creation), build_up_play = max(build_up_play), defense = max(defense))
Print the data
In [21]:
teamAttrTable
Join the two tables into one
In [22]:
teamTableData = left_join(teamTable, teamAttrTable, by = "team_api_id")
Print the data
In [23]:
teamTableData
Group the teams based on the two given ids
In [24]:
teamMatchDataTable = gather(tbl_df(matchTable), team, team_api_id, home_team_api_id,away_team_api_id)
Find the gilas scored by the home team
In [25]:
homeTeamGoals = gather(tbl_df(matchTable), team, team_api_id, home_team_api_id) %>%
group_by(team_api_id) %>%
summarize(homeGoals = sum(home_team_goal))
Print the data
In [26]:
head(homeTeamGoals,n =10)
Group the teams based on the id's for away teams
In [27]:
awayTeamGoals = gather(tbl_df(matchTable), team, team_api_id, away_team_api_id) %>%
group_by(team_api_id) %>%
summarize(awayGoals = sum(away_team_goal))
Join the teams based on the goals when playing as away and home team
In [28]:
teamGoalsData = full_join(awayTeamGoals,homeTeamGoals) %>% mutate(goals = (awayGoals+homeGoals))
Print the data
In [29]:
teamGoalsData
Join the tables to make a full team table
In [30]:
teamFinalData = left_join(tbl_df(teamTableData),teamGoalsData,by = "team_api_id")
Print the data
In [31]:
head(teamFinalData,n=10)
Write the player data to a csv file
In [32]:
write.csv(playersFinalData, file = "player.csv",row.names=FALSE)
Write the team data to a csv file
In [33]:
write.csv(teamFinalData, file = "team.csv",row.names=FALSE)