Use this notebook to read in sqlite database and export csv with basic features


In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sqlite3

Load SQLite tables


In [3]:
with sqlite3.connect('/Users/mtetkosk/Google Drive/Fantasy Sports/Soccer/database.sqlite') as con:
    countries = pd.read_sql_query("SELECT * from Country", con)
    players = pd.read_sql_query("SELECT * FROM Player",con)
    player_attributes = pd.read_sql_query("SELECT * FROM Player_Attributes",con)
    team_attributes = pd.read_sql_query("SELECT * FROM Team_Attributes",con)
    matches = pd.read_sql_query("SELECT * from Match", con)
    leagues = pd.read_sql_query("SELECT * from League", con)
    teams = pd.read_sql_query("SELECT * from Team", con)

Goal of Analysis - Predict total number of goals in English Premier League matches


In [5]:
#Filter teams and matches in English Premier League
epl_matches = matches[matches.league_id == 1729]
epl_team_ids = pd.DataFrame({'ID':(epl_matches['home_team_api_id'].unique())})
epl_teams = pd.merge(teams,epl_team_ids,how = 'inner', left_on = 'team_api_id', right_on = 'ID' )
epl_team_attributes = pd.merge(team_attributes, epl_team_ids, how = 'inner', left_on = 'team_api_id', right_on = 'ID')

In [6]:
# Write csv files
epl_matches.to_csv('/Users/mtetkosk/Google Drive/Data Science Projects/data/processed/EPL_matches.csv')
epl_teams.to_csv('/Users/mtetkosk/Google Drive/Data Science Projects/data/processed/EPL_teams.csv')
player_attributes.to_csv('/Users/mtetkosk/Google Drive/Data Science Projects/data/processed/Player_Attributes.csv')
players.to_csv('/Users/mtetkosk/Google Drive/Data Science Projects/data/processed/Players.csv')
epl_team_attributes.to_csv('/Users/mtetkosk/Google Drive/Data Science Projects/data/processed/EPL_team_attributes.csv')

In [ ]: