In [1]:
from db import DB
db = DB(filename="./baseball-archive-2012.sqlite", dbtype="sqlite")
db
Indexing schema. This will take a second...finished!
Out[1]:
DB[sqlite][localhost]:5432 > None@None
In [2]:
db.tables.allstarfull
Out[2]:
Column
Type
playerID
TEXT
yearID
INTEGER
gameNum
INTEGER
gameID
TEXT
teamID
TEXT
lgID
TEXT
GP
INTEGER
startingPos
INTEGER
In [3]:
db.tables.allstarfull.head()
Out[3]:
playerID
yearID
gameNum
gameID
teamID
lgID
GP
startingPos
0
aaronha01
1955
0
NLS195507120
ML1
NL
1
NaN
1
aaronha01
1956
0
ALS195607100
ML1
NL
1
NaN
2
aaronha01
1957
0
NLS195707090
ML1
NL
1
9
3
aaronha01
1958
0
ALS195807080
ML1
NL
1
9
4
aaronha01
1959
1
NLS195907070
ML1
NL
1
9
5
aaronha01
1959
2
NLS195908030
ML1
NL
1
9
In [4]:
db.tables.allstarfull.sample()
Out[4]:
playerID
yearID
gameNum
gameID
teamID
lgID
GP
startingPos
0
kellech01
1941
0
ALS194107080
NYA
AL
1
NaN
1
burdele01
1959
1
NLS195907070
ML1
NL
1
NaN
2
vaughar01
1936
0
NLS193607070
PIT
NL
0
NaN
3
benchjo01
1969
0
ALS196907230
CIN
NL
1
2
4
reesepe01
1951
0
ALS195107100
BRO
NL
1
NaN
5
glavito02
2002
0
NLS200207090
ATL
NL
0
NaN
6
dimagjo01
1939
0
ALS193907110
NYA
AL
1
8
7
harrebu01
1971
0
ALS197107130
NYN
NL
1
6
8
truckvi01
1949
0
NLS194907120
DET
AL
1
NaN
9
patekfr01
1976
0
NLS197607130
KCA
AL
1
NaN
In [5]:
db.tables.allstarfull.playerID
Out[5]:
Table
Name
Type
allstarfull
playerID
TEXT
In [6]:
db.tables.allstarfull.playerID.head()
Out[6]:
0 aaronha01
1 aaronha01
2 aaronha01
3 aaronha01
4 aaronha01
5 aaronha01
Name: playerID, dtype: object
In [7]:
db.tables.allstarfull.playerID.unique().count()
Out[7]:
1637
In [8]:
db.find_column("*player*")
Out[8]:
Table
Column Name
Type
allstarfull
playerID
TEXT
appearances
playerID
TEXT
awardsplayers
playerID
TEXT
awardsshareplayers
playerID
TEXT
battingpost
playerID
TEXT
fielding
playerID
TEXT
fieldingof
playerID
TEXT
fieldingpost
playerID
TEXT
master
playerID
TEXT
pitching
playerID
TEXT
pitchingpost
playerID
TEXT
salaries
playerID
TEXT
schoolsplayers
playerID
TEXT
tmp_batting
playerID
TEXT
In [9]:
db.find_column("*ID*")
Out[9]:
Table
Column Name
Type
allstarfull
lgID
TEXT
allstarfull
gameID
TEXT
allstarfull
playerID
TEXT
allstarfull
teamID
TEXT
allstarfull
yearID
INTEGER
appearances
playerID
TEXT
appearances
teamID
TEXT
appearances
yearID
INTEGER
appearances
lgID
TEXT
awardsmanagers
lgID
TEXT
awardsmanagers
managerID
TEXT
awardsmanagers
yearID
INTEGER
awardsmanagers
awardID
TEXT
awardsplayers
lgID
TEXT
awardsplayers
playerID
TEXT
awardsplayers
yearID
INTEGER
awardsplayers
awardID
TEXT
awardssharemanagers
lgID
TEXT
awardssharemanagers
managerID
TEXT
awardssharemanagers
yearID
INTEGER
awardssharemanagers
awardID
TEXT
awardsshareplayers
lgID
TEXT
awardsshareplayers
playerID
TEXT
awardsshareplayers
yearID
INTEGER
awardsshareplayers
awardID
TEXT
battingpost
playerID
TEXT
battingpost
teamID
TEXT
battingpost
yearID
INTEGER
battingpost
GIDP
INTEGER
battingpost
lgID
TEXT
fielding
playerID
TEXT
fielding
teamID
TEXT
fielding
yearID
INTEGER
fielding
lgID
TEXT
fieldingof
playerID
TEXT
fieldingof
yearID
INTEGER
fieldingpost
lgID
TEXT
fieldingpost
playerID
TEXT
fieldingpost
teamID
TEXT
fieldingpost
yearID
INTEGER
halloffame
hofID
TEXT
hofold
hofID
TEXT
managers
lgID
TEXT
managers
teamID
TEXT
managers
managerID
TEXT
managers
yearID
INTEGER
managershalf
lgID
TEXT
managershalf
teamID
TEXT
managershalf
managerID
TEXT
managershalf
yearID
INTEGER
master
lahman40ID
TEXT
master
bbrefID
TEXT
master
retroID
TEXT
master
lahmanID
INTEGER
master
playerID
TEXT
master
hofID
TEXT
master
holtzID
TEXT
master
managerID
TEXT
master
lahman45ID
TEXT
pitching
playerID
TEXT
pitching
teamID
TEXT
pitching
yearID
INTEGER
pitching
GIDP
INTEGER
pitching
lgID
TEXT
pitchingpost
playerID
TEXT
pitchingpost
teamID
TEXT
pitchingpost
yearID
INTEGER
pitchingpost
GIDP
INTEGER
pitchingpost
lgID
TEXT
salaries
lgID
TEXT
salaries
playerID
TEXT
salaries
teamID
TEXT
salaries
yearID
INTEGER
schools
schoolID
TEXT
schoolsplayers
schoolID
TEXT
schoolsplayers
playerID
TEXT
seriespost
lgIDwinner
TEXT
seriespost
lgIDloser
TEXT
seriespost
teamIDwinner
TEXT
seriespost
yearID
INTEGER
seriespost
teamIDloser
TEXT
teams
teamID
TEXT
teams
teamIDBR
TEXT
teams
teamIDretro
TEXT
teams
yearID
INTEGER
teams
teamIDlahman45
TEXT
teams
lgID
TEXT
teams
franchID
TEXT
teams
divID
TEXT
teamsfranchises
franchID
TEXT
teamshalf
lgID
TEXT
teamshalf
divID
TEXT
teamshalf
teamID
TEXT
teamshalf
yearID
INTEGER
tmp_batting
playerID
TEXT
tmp_batting
teamID
TEXT
tmp_batting
yearID
INTEGER
tmp_batting
GIDP
INTEGER
tmp_batting
lgID
TEXT
In [10]:
db.find_column("HR")
Out[10]:
Table
Column Name
Type
battingpost
HR
INTEGER
pitching
HR
INTEGER
pitchingpost
HR
INTEGER
teams
HR
INTEGER
tmp_batting
HR
INTEGER
In [11]:
db.find_column("HR", "INTEGER")
Out[11]:
Table
Column Name
Type
battingpost
HR
INTEGER
pitching
HR
INTEGER
pitchingpost
HR
INTEGER
teams
HR
INTEGER
tmp_batting
HR
INTEGER
In [12]:
db.find_column("*", "INTEGER")
Out[12]:
Table
Column Name
Type
allstarfull
GP
INTEGER
allstarfull
yearID
INTEGER
allstarfull
startingPos
INTEGER
allstarfull
gameNum
INTEGER
appearances
G_cf
INTEGER
appearances
G_all
INTEGER
appearances
G_2b
INTEGER
appearances
G_batting
INTEGER
appearances
G_p
INTEGER
appearances
G_ss
INTEGER
appearances
G_of
INTEGER
appearances
G_c
INTEGER
appearances
G_dh
INTEGER
appearances
yearID
INTEGER
appearances
G_3b
INTEGER
appearances
G_rf
INTEGER
appearances
G_1b
INTEGER
appearances
G_lf
INTEGER
appearances
G_defense
INTEGER
appearances
G_ph
INTEGER
appearances
G_pr
INTEGER
awardsmanagers
yearID
INTEGER
awardsplayers
yearID
INTEGER
awardssharemanagers
votesFirst
INTEGER
awardssharemanagers
pointsWon
INTEGER
awardssharemanagers
yearID
INTEGER
awardssharemanagers
pointsMax
INTEGER
awardsshareplayers
yearID
INTEGER
awardsshareplayers
pointsMax
INTEGER
battingpost
RBI
INTEGER
battingpost
BB
INTEGER
battingpost
HR
INTEGER
battingpost
IBB
INTEGER
battingpost
3B
INTEGER
battingpost
HBP
INTEGER
battingpost
AB
INTEGER
battingpost
G
INTEGER
battingpost
H
INTEGER
battingpost
yearID
INTEGER
battingpost
R
INTEGER
battingpost
2B
INTEGER
battingpost
CS
INTEGER
battingpost
GIDP
INTEGER
battingpost
SF
INTEGER
battingpost
SH
INTEGER
battingpost
SO
INTEGER
battingpost
SB
INTEGER
fielding
WP
INTEGER
fielding
E
INTEGER
fielding
stint
INTEGER
fielding
DP
INTEGER
fielding
PB
INTEGER
fielding
PO
INTEGER
fielding
A
INTEGER
fielding
GS
INTEGER
fielding
G
INTEGER
fielding
yearID
INTEGER
fielding
CS
INTEGER
fielding
InnOuts
INTEGER
fielding
SB
INTEGER
fieldingof
Gcf
INTEGER
fieldingof
Glf
INTEGER
fieldingof
Grf
INTEGER
fieldingof
yearID
INTEGER
fieldingof
stint
INTEGER
fieldingpost
A
INTEGER
fieldingpost
G
INTEGER
fieldingpost
TP
INTEGER
fieldingpost
PB
INTEGER
fieldingpost
InnOuts
INTEGER
fieldingpost
yearID
INTEGER
fieldingpost
PO
INTEGER
fieldingpost
SB
INTEGER
fieldingpost
CS
INTEGER
fieldingpost
GS
INTEGER
fieldingpost
E
INTEGER
fieldingpost
DP
INTEGER
halloffame
votes
INTEGER
halloffame
needed
INTEGER
halloffame
yearid
INTEGER
halloffame
ballots
INTEGER
hofold
votes
INTEGER
hofold
yearid
INTEGER
hofold
ballots
INTEGER
managers
G
INTEGER
managers
rank
INTEGER
managers
W
INTEGER
managers
yearID
INTEGER
managers
inseason
INTEGER
managers
L
INTEGER
managershalf
G
INTEGER
managershalf
rank
INTEGER
managershalf
W
INTEGER
managershalf
yearID
INTEGER
managershalf
inseason
INTEGER
managershalf
half
INTEGER
managershalf
L
INTEGER
master
weight
INTEGER
master
lahmanID
INTEGER
master
birthMonth
INTEGER
master
deathMonth
INTEGER
master
deathYear
INTEGER
master
birthYear
INTEGER
master
birthDay
INTEGER
master
deathDay
INTEGER
pitching
BB
INTEGER
pitching
HR
INTEGER
pitching
IBB
INTEGER
pitching
IPouts
INTEGER
pitching
BK
INTEGER
pitching
WP
INTEGER
pitching
stint
INTEGER
pitching
HBP
INTEGER
pitching
SH
INTEGER
pitching
ER
INTEGER
pitching
GS
INTEGER
pitching
G
INTEGER
pitching
H
INTEGER
pitching
CG
INTEGER
pitching
L
INTEGER
pitching
GF
INTEGER
pitching
yearID
INTEGER
pitching
BFP
INTEGER
pitching
W
INTEGER
pitching
GIDP
INTEGER
pitching
SHO
INTEGER
pitching
SV
INTEGER
pitching
R
INTEGER
pitching
SO
INTEGER
pitching
SF
INTEGER
pitchingpost
BB
INTEGER
pitchingpost
HR
INTEGER
pitchingpost
IBB
INTEGER
pitchingpost
IPouts
INTEGER
pitchingpost
BK
INTEGER
pitchingpost
WP
INTEGER
pitchingpost
HBP
INTEGER
pitchingpost
SH
INTEGER
pitchingpost
ER
INTEGER
pitchingpost
GS
INTEGER
pitchingpost
G
INTEGER
pitchingpost
H
INTEGER
pitchingpost
CG
INTEGER
pitchingpost
L
INTEGER
pitchingpost
GF
INTEGER
pitchingpost
yearID
INTEGER
pitchingpost
BFP
INTEGER
pitchingpost
W
INTEGER
pitchingpost
GIDP
INTEGER
pitchingpost
SHO
INTEGER
pitchingpost
SV
INTEGER
pitchingpost
SF
INTEGER
pitchingpost
R
INTEGER
pitchingpost
SO
INTEGER
salaries
yearID
INTEGER
schoolsplayers
yearMax
INTEGER
schoolsplayers
yearMin
INTEGER
seriespost
wins
INTEGER
seriespost
yearID
INTEGER
seriespost
losses
INTEGER
seriespost
ties
INTEGER
teams
W
INTEGER
teams
BB
INTEGER
teams
BPF
INTEGER
teams
HR
INTEGER
teams
IPouts
INTEGER
teams
Ghome
INTEGER
teams
3B
INTEGER
teams
HA
INTEGER
teams
HBP
INTEGER
teams
DP
INTEGER
teams
SOA
INTEGER
teams
attendance
INTEGER
teams
PPF
INTEGER
teams
RA
INTEGER
teams
SHO
INTEGER
teams
AB
INTEGER
teams
E
INTEGER
teams
G
INTEGER
teams
H
INTEGER
teams
CG
INTEGER
teams
L
INTEGER
teams
BBA
INTEGER
teams
yearID
INTEGER
teams
R
INTEGER
teams
2B
INTEGER
teams
CS
INTEGER
teams
HRA
INTEGER
teams
ER
INTEGER
teams
SV
INTEGER
teams
Rank
INTEGER
teams
SO
INTEGER
teams
SB
INTEGER
teams
SF
INTEGER
teamshalf
G
INTEGER
teamshalf
Rank
INTEGER
teamshalf
L
INTEGER
teamshalf
yearID
INTEGER
teamshalf
W
INTEGER
tmp_batting
RBI
INTEGER
tmp_batting
BB
INTEGER
tmp_batting
HR
INTEGER
tmp_batting
IBB
INTEGER
tmp_batting
3B
INTEGER
tmp_batting
G_old
INTEGER
tmp_batting
stint
INTEGER
tmp_batting
G_batting
INTEGER
tmp_batting
HBP
INTEGER
tmp_batting
AB
INTEGER
tmp_batting
G
INTEGER
tmp_batting
H
INTEGER
tmp_batting
yearID
INTEGER
tmp_batting
R
INTEGER
tmp_batting
2B
INTEGER
tmp_batting
CS
INTEGER
tmp_batting
GIDP
INTEGER
tmp_batting
SH
INTEGER
tmp_batting
SO
INTEGER
tmp_batting
SB
INTEGER
tmp_batting
SF
INTEGER
In [13]:
db.find_table("*")
Out[13]:
Table
Columns
allstarfull
playerID, yearID, gameNum, gameID, teamID, lgID, GP, startingPos
appearances
yearID, teamID, lgID, playerID, G_all, G_batting, G_defense, G_p, G_c, G_1b, G_2
b, G_3b, G_ss, G_lf, G_cf, G_rf, G_of, G_dh, G_ph, G_pr
awardsmanagers
managerID, awardID, yearID, lgID, tie, notes
awardsplayers
playerID, awardID, yearID, lgID, tie, notes
awardssharemanagers
awardID, yearID, lgID, managerID, pointsWon, pointsMax, votesFirst
awardsshareplayers
awardID, yearID, lgID, playerID, pointsWon, pointsMax, votesFirst
battingpost
yearID, round, playerID, teamID, lgID, G, AB, R, H, 2B, 3B, HR, RBI, SB, CS, BB,
SO, IBB, HBP, SH, SF, GIDP
fielding
playerID, yearID, stint, teamID, lgID, POS, G, GS, InnOuts, PO, A, E, DP, PB, WP
, SB, CS, ZR
fieldingof
playerID, yearID, stint, Glf, Gcf, Grf
fieldingpost
playerID, yearID, teamID, lgID, round, POS, G, GS, InnOuts, PO, A, E, DP, TP, PB
, SB, CS
halloffame
hofID, yearid, votedBy, ballots, needed, votes, inducted, category
hofold
hofID, yearid, votedBy, ballots, votes, inducted, category
managers
managerID, yearID, teamID, lgID, inseason, G, W, L, rank, plyrMgr
managershalf
managerID, yearID, teamID, lgID, inseason, half, G, W, L, rank
master
lahmanID, playerID, managerID, hofID, birthYear, birthMonth, birthDay, birthCoun
try, birthState, birthCity, deathYear, deathMonth, deathDay, deathCountry, death
State, deathCity, nameFirst, nameLast, nameNote, nameGiven, nameNick, weight, he
ight, bats, throws, debut, finalGame, college, lahman40ID, lahman45ID, retroID,
holtzID, bbrefID
pitching
playerID, yearID, stint, teamID, lgID, W, L, G, GS, CG, SHO, SV, IPouts, H, ER,
HR, BB, SO, BAOpp, ERA, IBB, WP, HBP, BK, BFP, GF, R, SH, SF, GIDP
pitchingpost
playerID, yearID, round, teamID, lgID, W, L, G, GS, CG, SHO, SV, IPouts, H, ER,
HR, BB, SO, BAOpp, ERA, IBB, WP, HBP, BK, BFP, GF, R, SH, SF, GIDP
salaries
yearID, teamID, lgID, playerID, salary
schools
schoolID, schoolName, schoolCity, schoolState, schoolNick
schoolsplayers
playerID, schoolID, yearMin, yearMax
seriespost
yearID, round, teamIDwinner, lgIDwinner, teamIDloser, lgIDloser, wins, losses, t
ies
teams
yearID, lgID, teamID, franchID, divID, Rank, G, Ghome, W, L, DivWin, WCWin, LgWi
n, WSWin, R, AB, H, 2B, 3B, HR, BB, SO, SB, CS, HBP, SF, RA, ER, ERA, CG, SHO, S
V, IPouts, HA, HRA, BBA, SOA, E, DP, FP, name, park, attendance, BPF, PPF, teamI
DBR, teamIDlahman45, teamIDretro
teamsfranchises
franchID, franchName, active, NAassoc
teamshalf
yearID, lgID, teamID, Half, divID, DivWin, Rank, G, W, L
tmp_batting
playerID, yearID, stint, teamID, lgID, G, G_batting, AB, R, H, 2B, 3B, HR, RBI,
SB, CS, BB, SO, IBB, HBP, SH, SF, GIDP, G_old
In [14]:
db.find_table("*batting*")
Out[14]:
Table
Columns
battingpost
yearID, round, playerID, teamID, lgID, G, AB, R, H, 2B, 3B, HR, RBI, SB, CS, BB,
SO, IBB, HBP, SH, SF, GIDP
tmp_batting
playerID, yearID, stint, teamID, lgID, G, G_batting, AB, R, H, 2B, 3B, HR, RBI,
SB, CS, BB, SO, IBB, HBP, SH, SF, GIDP, G_old
In [15]:
db.save_credentials("baseball")
In [16]:
from db import DB
In [17]:
db = DB(profile="baseball", dbtype="sqlite")
Indexing schema. This will take a second...finished!
In [18]:
db.tables
Out[18]:
Table
Columns
allstarfull
playerID, yearID, gameNum, gameID, teamID, lgID, GP, startingPos
appearances
yearID, teamID, lgID, playerID, G_all, G_batting, G_defense, G_p, G_c, G_1b, G_2
b, G_3b, G_ss, G_lf, G_cf, G_rf, G_of, G_dh, G_ph, G_pr
awardsmanagers
managerID, awardID, yearID, lgID, tie, notes
awardsplayers
playerID, awardID, yearID, lgID, tie, notes
awardssharemanagers
awardID, yearID, lgID, managerID, pointsWon, pointsMax, votesFirst
awardsshareplayers
awardID, yearID, lgID, playerID, pointsWon, pointsMax, votesFirst
battingpost
yearID, round, playerID, teamID, lgID, G, AB, R, H, 2B, 3B, HR, RBI, SB, CS, BB,
SO, IBB, HBP, SH, SF, GIDP
fielding
playerID, yearID, stint, teamID, lgID, POS, G, GS, InnOuts, PO, A, E, DP, PB, WP
, SB, CS, ZR
fieldingof
playerID, yearID, stint, Glf, Gcf, Grf
fieldingpost
playerID, yearID, teamID, lgID, round, POS, G, GS, InnOuts, PO, A, E, DP, TP, PB
, SB, CS
halloffame
hofID, yearid, votedBy, ballots, needed, votes, inducted, category
hofold
hofID, yearid, votedBy, ballots, votes, inducted, category
managers
managerID, yearID, teamID, lgID, inseason, G, W, L, rank, plyrMgr
managershalf
managerID, yearID, teamID, lgID, inseason, half, G, W, L, rank
master
lahmanID, playerID, managerID, hofID, birthYear, birthMonth, birthDay, birthCoun
try, birthState, birthCity, deathYear, deathMonth, deathDay, deathCountry, death
State, deathCity, nameFirst, nameLast, nameNote, nameGiven, nameNick, weight, he
ight, bats, throws, debut, finalGame, college, lahman40ID, lahman45ID, retroID,
holtzID, bbrefID
pitching
playerID, yearID, stint, teamID, lgID, W, L, G, GS, CG, SHO, SV, IPouts, H, ER,
HR, BB, SO, BAOpp, ERA, IBB, WP, HBP, BK, BFP, GF, R, SH, SF, GIDP
pitchingpost
playerID, yearID, round, teamID, lgID, W, L, G, GS, CG, SHO, SV, IPouts, H, ER,
HR, BB, SO, BAOpp, ERA, IBB, WP, HBP, BK, BFP, GF, R, SH, SF, GIDP
salaries
yearID, teamID, lgID, playerID, salary
schools
schoolID, schoolName, schoolCity, schoolState, schoolNick
schoolsplayers
playerID, schoolID, yearMin, yearMax
seriespost
yearID, round, teamIDwinner, lgIDwinner, teamIDloser, lgIDloser, wins, losses, t
ies
teams
yearID, lgID, teamID, franchID, divID, Rank, G, Ghome, W, L, DivWin, WCWin, LgWi
n, WSWin, R, AB, H, 2B, 3B, HR, BB, SO, SB, CS, HBP, SF, RA, ER, ERA, CG, SHO, S
V, IPouts, HA, HRA, BBA, SOA, E, DP, FP, name, park, attendance, BPF, PPF, teamI
DBR, teamIDlahman45, teamIDretro
teamsfranchises
franchID, franchName, active, NAassoc
teamshalf
yearID, lgID, teamID, Half, divID, DivWin, Rank, G, W, L
tmp_batting
playerID, yearID, stint, teamID, lgID, G, G_batting, AB, R, H, 2B, 3B, HR, RBI,
SB, CS, BB, SO, IBB, HBP, SH, SF, GIDP, G_old
In [19]:
db.tables.appearances.head()
Out[19]:
yearID
teamID
lgID
playerID
G_all
G_batting
G_defense
G_p
G_c
G_1b
G_2b
G_3b
G_ss
G_lf
G_cf
G_rf
G_of
G_dh
G_ph
G_pr
0
1871
BS1
NA
barnero01
31
None
31
31
0
0
0
16
0
15
0
0
0
0
None
None
1
1871
BS1
NA
barrofr01
18
None
18
18
0
0
0
1
0
0
13
0
4
17
None
None
2
1871
BS1
NA
birdsda01
29
None
29
29
0
7
0
0
0
0
0
0
27
27
None
None
3
1871
BS1
NA
conefr01
19
None
19
19
0
0
0
0
0
0
18
0
1
18
None
None
4
1871
BS1
NA
gouldch01
31
None
31
31
0
0
30
0
0
0
0
0
1
1
None
None
5
1871
BS1
NA
jackssa01
16
None
16
16
0
0
0
14
0
1
0
1
0
1
None
None
In [19]:
Content source: Cophy08/db.py
Similar notebooks: