Lotto Number Predictions

This notebook performs a download of the most recent winning numbers from the Lotto649 (a lottery managed by the British Columbia Lotto Corporation in BC, Canada), and performs basic number analysis on them in order to describe the most winningest numbers in history.

Imports


In [14]:
import sys
import pandas as pd
import datetime as dt
import urllib.request
import zipfile

%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (20.0, 10.0)

Resources


In [2]:
# Main Getter Function

# Download 
def dl_file(url, data_path, file_name):
    print("### Downloading " + url + " and saving as " + data_path + file_name)
    urllib.request.urlretrieve(url, data_path + file_name)
    
    
# Unzip
def unzip(data_path, file_name):
  print("### Unzipping " + file_name + " at " + data_path)
  #extension = ".zip"

  zip_ref = zipfile.ZipFile(data_path + file_name, 'r')
  zip_ref.extractall(data_path)
  zip_ref.close()

# Variables 
# TODO - make abstract to handle other sets
url = "http://www.bclc.com/documents/DownloadableNumbers/CSV/649.zip"
data_path = "data/"
file_name = "649"

# download zip file and save
dl_file(url, data_path, file_name)

# unzip to csv
unzip(data_path, file_name)


### Downloading http://www.bclc.com/documents/DownloadableNumbers/CSV/649.zip and saving as data/649
### Unzipping 649 at data/

Conversion


In [3]:
base_reader = pd.read_csv(data_path + file_name + ".csv")
base_reader.set_index("DRAW NUMBER", inplace=True)
base_reader.tail()


Out[3]:
PRODUCT SEQUENCE NUMBER DRAW DATE NUMBER DRAWN 1 NUMBER DRAWN 2 NUMBER DRAWN 3 NUMBER DRAWN 4 NUMBER DRAWN 5 NUMBER DRAWN 6 BONUS NUMBER
DRAW NUMBER
3622 649 0 2018-10-06 20 23 24 33 37 48 22
3623 649 0 2018-10-10 3 15 16 20 22 23 24
3624 649 0 2018-10-13 25 27 31 41 44 45 16
3625 649 0 2018-10-17 10 32 37 45 46 47 6
3626 649 0 2018-10-20 4 12 16 25 30 31 8

In [ ]:


In [4]:
columns = list(base_reader.loc[:,'NUMBER DRAWN 1':'NUMBER DRAWN 6'])
draws = base_reader[columns]

draws.tail()


Out[4]:
NUMBER DRAWN 1 NUMBER DRAWN 2 NUMBER DRAWN 3 NUMBER DRAWN 4 NUMBER DRAWN 5 NUMBER DRAWN 6
DRAW NUMBER
3622 20 23 24 33 37 48
3623 3 15 16 20 22 23
3624 25 27 31 41 44 45
3625 10 32 37 45 46 47
3626 4 12 16 25 30 31

Analysis

Ball #1


In [5]:
# set
b1 = draws["NUMBER DRAWN 1"]

# describe
b1.describe()


Out[5]:
count    3700.000000
mean        7.321081
std         5.812018
min         1.000000
25%         3.000000
50%         6.000000
75%        10.000000
max        38.000000
Name: NUMBER DRAWN 1, dtype: float64

In [6]:
# unique
b1vc = b1.value_counts()
b1vc.columns = ["value"]
b1vc


Out[6]:
1     426
2     395
3     355
4     350
5     263
6     240
7     228
9     195
8     193
10    153
11    142
12    127
13    101
14     73
15     69
16     69
17     53
18     52
20     43
19     42
21     28
23     26
22     17
24     13
26     11
27     10
25      7
28      6
30      4
29      4
31      2
33      1
32      1
38      1
Name: NUMBER DRAWN 1, dtype: int64

In [7]:
b1vc.plot(kind='bar')


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x118fa1f98>

In [8]:
b1vc_vals = b1vc.index
b1vc_vals


Out[8]:
Int64Index([ 1,  2,  3,  4,  5,  6,  7,  9,  8, 10, 11, 12, 13, 14, 15, 16, 17,
            18, 20, 19, 21, 23, 22, 24, 26, 27, 25, 28, 30, 29, 31, 33, 32,
            38],
           dtype='int64')

In [11]:
fig1, ax1 = plt.subplots()

explode = (0.05, 0)

ax1.pie(b1vc, autopct='%1.0f%%', labels=b1vc_vals, shadow=True, startangle=90)
ax1.axis('equal')
plt.legend(title="Legend")
plt.show()


Validaton

It seems very odd to me that the most frequent numbers are the lowest numbers, so let's do a little bit of analysis in order to validate that i'm not doing something wrong...


In [16]:
# Filter Columns to Date and Ball #1
draw_dates_1_list = list(base_reader.loc[:,'DRAW DATE':'NUMBER DRAWN 1'])
draw_dates_1 = base_reader[draw_dates_1_list]
draw_dates_1.tail()


Out[16]:
DRAW DATE NUMBER DRAWN 1
DRAW NUMBER
3622 2018-10-06 20
3623 2018-10-10 3
3624 2018-10-13 25
3625 2018-10-17 10
3626 2018-10-20 4

In [19]:
# Filter to 2018 Draws
draw_dates_1_2018 = draw_dates_1[(draw_dates_1['DRAW DATE'] > '2018-0-0')]
draw_dates_1_2018.head()


Out[19]:
DRAW DATE NUMBER DRAWN 1
DRAW NUMBER
3543 2018-01-03 3
3544 2018-01-06 12
3545 2018-01-10 9
3546 2018-01-13 21
3547 2018-01-17 6

In [22]:
# Filter to display all #1 draws during 2018
draw_dates_ball_one_number_one = draw_dates_1_2018[(draw_dates_1_2018['NUMBER DRAWN 1'] == 1)]
draw_dates_ball_one_number_one


Out[22]:
DRAW DATE NUMBER DRAWN 1
DRAW NUMBER
3551 2018-01-31 1
3563 2018-03-14 1
3565 2018-03-21 1
3572 2018-04-14 1
3596 2018-07-07 1
3597 2018-07-11 1
3620 2018-09-29 1

In [37]:
# Let's see what percent of the draws in 2018 are Ball #1

In [38]:
percent_ball_one = len(draw_dates_1_2018.index) / len(draw_dates_ball_one_number_one.index)
percent_ball_one


Out[38]:
12.0

In [39]:
# This seems to match with our results from Cell 11... let's verify

In [53]:
# ball_one_number_one_list = list(base_reader.loc[:,'NUMBER DRAWN 1'])
# ball_one_number_one = base_reader[ball_one_number_one_list]
# ball_one_number_one

In [45]:
ball_one_number_one_draws = draws[(draws['NUMBER DRAWN 1'] == 1)]
ball_one_number_one_draws


percent_ball_one = len(draw_dates_1_2018.index) / len(ball_one_number_one_draws.index)
percent_ball_one


Out[45]:
NUMBER DRAWN 1 NUMBER DRAWN 2 NUMBER DRAWN 3 NUMBER DRAWN 4 NUMBER DRAWN 5 NUMBER DRAWN 6
DRAW NUMBER
3 1 6 23 24 27 39
45 1 3 19 31 32 47
62 1 2 8 23 46 48
65 1 6 13 15 22 27
68 1 6 15 34 42 44
69 1 15 20 22 24 49
73 1 5 7 8 11 13
75 1 2 7 13 22 31
79 1 14 22 25 40 42
81 1 2 13 21 42 48
83 1 7 14 31 41 43
85 1 5 14 24 25 38
100 1 5 6 18 31 35
102 1 4 14 17 21 26
103 1 3 40 43 45 46
113 1 10 18 27 35 38
125 1 16 25 36 39 42
142 1 2 7 9 20 43
152 1 2 5 7 40 44
159 1 7 15 20 31 45
177 1 26 33 34 38 46
181 1 3 8 18 35 47
183 1 6 11 27 33 41
199 1 7 19 36 42 49
201 1 7 11 14 19 38
207 1 10 13 19 25 33
215 1 12 15 19 22 30
221 1 7 17 28 41 45
242 1 25 34 41 44 49
252 1 8 10 34 42 45
... ... ... ... ... ... ...
3290 1 4 7 9 12 37
3303 1 2 9 12 21 39
3307 1 6 8 13 21 26
3308 1 3 15 27 31 36
3314 1 13 28 34 37 45
3323 1 2 3 4 29 48
3355 1 3 7 16 17 37
3356 1 10 11 12 19 27
3361 1 3 5 8 13 31
3365 1 2 7 9 18 27
3368 1 7 18 28 30 34
3386 1 16 28 33 39 45
3404 1 15 17 20 23 43
3413 1 3 31 32 39 49
3437 1 5 10 15 40 41
3440 1 10 18 19 23 27
3442 1 6 19 30 32 44
3451 1 16 17 24 41 46
3473 1 5 7 16 28 46
3481 1 3 8 9 12 40
3497 1 2 18 26 36 44
3525 1 23 29 32 45 49
3526 1 3 6 21 22 33
3551 1 7 27 31 34 39
3563 1 9 10 19 33 43
3565 1 5 9 12 39 41
3572 1 11 19 36 48 49
3596 1 3 25 40 44 47
3597 1 9 11 38 45 48
3620 1 6 7 10 31 49

426 rows × 6 columns

Ball #2


In [ ]:


In [8]:
# set
b2 = draws["NUMBER DRAWN 2"]

# describe
b2.describe()


Out[8]:
count    3672.000000
mean       14.558551
std         7.559001
min         2.000000
25%         9.000000
50%        14.000000
75%        20.000000
max        43.000000
Name: NUMBER DRAWN 2, dtype: float64

In [ ]:


In [9]:
# unique
b2vc = b2.value_counts()
b2vc.columns = ["value"]
b2vc


Out[9]:
12    194
8     189
10    184
7     181
9     179
13    178
15    178
14    172
11    165
17    162
16    159
5     152
6     150
20    133
19    129
18    127
21    125
4      98
22     96
3      94
24     84
23     82
26     67
25     63
27     50
29     49
2      47
28     43
30     40
32     26
31     21
33     13
35     11
34     11
36      4
37      4
38      3
40      3
39      3
43      1
41      1
42      1
Name: NUMBER DRAWN 2, dtype: int64

In [ ]:


In [10]:
b2vc.plot(kind='bar')


Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x11f926ac8>

In [56]:
b2vc_vals = b2vc.index
b2vc_vals


Out[56]:
Int64Index([12,  8, 10,  7,  9, 13, 15, 14, 11, 17, 16,  5,  6, 20, 19, 18, 21,
             4, 22,  3, 24, 23, 26, 25, 27, 29,  2, 28, 30, 32, 31, 33, 35, 34,
            36, 37, 38, 40, 39, 43, 41, 42],
           dtype='int64')

In [ ]:


In [57]:
fig1, ax1 = plt.subplots()

explode = (0.05, 0)

ax1.pie(b2vc, autopct='%1.0f%%', labels=b2vc_vals, shadow=True, startangle=90)
ax1.axis('equal')
plt.legend(title="Legend")
plt.show()



In [ ]:


In [ ]:

Ball #3


In [ ]:


In [11]:
# set
b3 = draws["NUMBER DRAWN 3"]

# describe
b3.describe()


Out[11]:
count    3672.000000
mean       21.885349
std         8.177291
min         3.000000
25%        16.000000
50%        22.000000
75%        28.000000
max        45.000000
Name: NUMBER DRAWN 3, dtype: float64

In [ ]:


In [12]:
# unique
b3vc = b3.value_counts()
b3vc.columns = ["value"]
b3vc


Out[12]:
22    177
23    168
20    164
21    164
19    163
24    161
25    159
17    154
18    149
16    141
26    134
15    132
29    127
27    126
28    115
14    114
13    111
12    104
31    100
30    100
32     93
11     90
33     84
10     76
34     70
9      68
36     59
8      57
7      51
35     51
6      41
37     38
38     28
40     26
39     21
41     18
5      15
42      6
4       6
43      5
3       3
44      2
45      1
Name: NUMBER DRAWN 3, dtype: int64

In [ ]:


In [13]:
b3vc.plot(kind='bar')


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x11fea4f98>

In [54]:
b3vc_vals = b3vc.index
b3vc_vals


Out[54]:
Int64Index([22, 23, 20, 21, 19, 24, 25, 17, 18, 16, 26, 15, 29, 27, 28, 14, 13,
            12, 31, 30, 32, 11, 33, 10, 34,  9, 36,  8,  7, 35,  6, 37, 38, 40,
            39, 41,  5, 42,  4, 43,  3, 44, 45],
           dtype='int64')

In [ ]:


In [55]:
fig1, ax1 = plt.subplots()

explode = (0.05, 0)

ax1.pie(b3vc, autopct='%1.0f%%', labels=b3vc_vals, shadow=True, startangle=90)
ax1.axis('equal')
plt.legend(title="Legend")
plt.show()



In [ ]:


In [ ]:

Ball #4


In [ ]:


In [14]:
# set
b4 = draws["NUMBER DRAWN 4"]

# describe
b4.describe()


Out[14]:
count    3672.000000
mean       28.977397
std         8.082271
min         4.000000
25%        23.000000
50%        30.000000
75%        35.000000
max        47.000000
Name: NUMBER DRAWN 4, dtype: float64

In [ ]:


In [15]:
# unique
b4vc = b4.value_counts()
b4vc.columns = ["value"]
b4vc


Out[15]:
31    200
27    173
34    171
33    164
32    163
30    160
26    160
28    159
29    148
36    145
25    135
23    132
35    130
37    122
38    120
24    118
22    109
20    108
40    107
39    106
21     97
19     81
18     79
41     78
16     61
42     59
17     59
43     48
15     47
14     36
44     29
11     27
13     27
12     25
45     21
10     19
9      15
46     13
8      11
47      4
7       3
4       2
6       1
Name: NUMBER DRAWN 4, dtype: int64

In [ ]:


In [16]:
b4vc.plot(kind='bar')


Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x11fe9a6d8>

In [60]:
b4vc_vals = b4vc.index
b4vc_vals


Out[60]:
Int64Index([31, 27, 34, 33, 32, 30, 26, 28, 29, 36, 25, 23, 35, 37, 38, 24, 22,
            20, 40, 39, 21, 19, 18, 41, 16, 42, 17, 43, 15, 14, 44, 11, 13, 12,
            45, 10,  9, 46,  8, 47,  7,  4,  6],
           dtype='int64')

In [ ]:


In [61]:
fig1, ax1 = plt.subplots()

explode = (0.05, 0)

ax1.pie(b4vc, autopct='%1.0f%%', labels=b4vc_vals, shadow=True, startangle=90)
ax1.axis('equal')
plt.legend(title="Legend")
plt.show()



In [ ]:

Ball #5


In [ ]:


In [17]:
# set
b5 = draws["NUMBER DRAWN 5"]

# describe
b5.describe()


Out[17]:
count    3672.000000
mean       36.156046
std         7.210188
min        11.000000
25%        31.000000
50%        37.000000
75%        42.000000
max        48.000000
Name: NUMBER DRAWN 5, dtype: float64

In [ ]:


In [18]:
# unique
b5vc = b5.value_counts()
b5vc.columns = ["value"]
b5vc


Out[18]:
38    211
41    207
40    203
43    192
37    190
34    186
39    185
42    184
36    166
44    165
35    164
45    161
33    143
31    140
46    138
32    128
30    115
27     94
29     88
28     86
47     83
24     63
26     62
25     55
48     46
23     40
22     37
21     35
20     28
19     23
17     15
18     11
14      9
13      6
16      6
12      3
11      2
15      2
Name: NUMBER DRAWN 5, dtype: int64

In [ ]:


In [19]:
b5vc.plot(kind='bar')


Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x120835550>

In [62]:
b5vc_vals = b5vc.index
b5vc_vals


Out[62]:
Int64Index([38, 41, 40, 43, 37, 34, 39, 42, 36, 44, 35, 45, 33, 31, 46, 32, 30,
            27, 29, 28, 47, 24, 26, 25, 48, 23, 22, 21, 20, 19, 17, 18, 14, 13,
            16, 12, 11, 15],
           dtype='int64')

In [ ]:


In [63]:
fig1, ax1 = plt.subplots()

explode = (0.05, 0)

ax1.pie(b5vc, autopct='%1.0f%%', labels=b5vc_vals, shadow=True, startangle=90)
ax1.axis('equal')
plt.legend(title="Legend")
plt.show()



In [ ]:

Ball #6


In [ ]:


In [20]:
# set
b6 = draws["NUMBER DRAWN 6"]

# describe
b6.describe()


Out[20]:
count    3672.000000
mean       43.091503
std         5.522822
min        13.000000
25%        40.000000
50%        45.000000
75%        47.000000
max        49.000000
Name: NUMBER DRAWN 6, dtype: float64

In [ ]:


In [21]:
# unique
b6vc = b6.value_counts()
b6vc.columns = ["value"]
b6vc


Out[21]:
49    449
48    396
47    392
46    324
45    308
44    267
43    233
42    199
41    157
40    156
39    125
38    106
37     88
36     85
35     76
33     54
34     46
32     39
31     37
29     28
30     23
28     21
27     18
26     12
23      7
25      6
24      5
20      5
21      4
19      3
13      1
22      1
18      1
Name: NUMBER DRAWN 6, dtype: int64

In [ ]:


In [22]:
b6vc.plot(kind='bar')


Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x120d5a4a8>

In [64]:
b6vc_vals = b6vc.index
b6vc_vals


Out[64]:
Int64Index([49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 33, 34,
            32, 31, 29, 30, 28, 27, 26, 23, 25, 24, 20, 21, 19, 13, 22, 18],
           dtype='int64')

In [ ]:


In [65]:
fig1, ax1 = plt.subplots()

explode = (0.05, 0)

ax1.pie(b6vc, autopct='%1.0f%%', labels=b6vc_vals, shadow=True, startangle=90)
ax1.axis('equal')
plt.legend(title="Legend")
plt.show()



In [ ]:


In [ ]:

Max Numbers


In [ ]:


In [66]:
total_counts = []
total_values = []
for index, column in enumerate(draws.columns):
    df = draws[column].value_counts().to_frame()
    df.columns = ["value"]
    val = df.iloc[0]
    total_counts.append(val.name)
    total_values.append(val.value)
    print("Most drawn number for %s is the number %s at %s times" % (draws.columns[index], str(val.name), str(val.item())) )


Most drawn number for NUMBER DRAWN 1 is the number 1 at 425 times
Most drawn number for NUMBER DRAWN 2 is the number 12 at 194 times
Most drawn number for NUMBER DRAWN 3 is the number 22 at 177 times
Most drawn number for NUMBER DRAWN 4 is the number 31 at 200 times
Most drawn number for NUMBER DRAWN 5 is the number 38 at 211 times
Most drawn number for NUMBER DRAWN 6 is the number 49 at 449 times

In [ ]:


In [ ]:


In [24]:
total_counts


Out[24]:
[1, 12, 22, 31, 38, 49]

In [ ]:


In [25]:
total_values


Out[25]:
[425, 194, 177, 200, 211, 449]

In [ ]:


In [26]:
index = ['Ball 1','Ball 2','Ball 3','Ball 4','Ball 5','Ball 6']
count = 'number chosen'
value = 'times chosen'
columns = [count,value]
df = pd.DataFrame(index=index, columns=columns)
df[count] = total_counts
df[value] = total_values

In [ ]:


In [27]:
df


Out[27]:
number chosen times chosen
Ball 1 1 425
Ball 2 12 194
Ball 3 22 177
Ball 4 31 200
Ball 5 38 211
Ball 6 49 449

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [2]:


In [ ]:


In [ ]:


In [ ]:


In [3]:


In [ ]:


In [ ]:


In [ ]: