In [14]:
import sys
import pandas as pd
import datetime as dt
import urllib.request
import zipfile
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (20.0, 10.0)
In [2]:
# Main Getter Function
# Download
def dl_file(url, data_path, file_name):
print("### Downloading " + url + " and saving as " + data_path + file_name)
urllib.request.urlretrieve(url, data_path + file_name)
# Unzip
def unzip(data_path, file_name):
print("### Unzipping " + file_name + " at " + data_path)
#extension = ".zip"
zip_ref = zipfile.ZipFile(data_path + file_name, 'r')
zip_ref.extractall(data_path)
zip_ref.close()
# Variables
# TODO - make abstract to handle other sets
url = "http://www.bclc.com/documents/DownloadableNumbers/CSV/649.zip"
data_path = "data/"
file_name = "649"
# download zip file and save
dl_file(url, data_path, file_name)
# unzip to csv
unzip(data_path, file_name)
In [3]:
base_reader = pd.read_csv(data_path + file_name + ".csv")
base_reader.set_index("DRAW NUMBER", inplace=True)
base_reader.tail()
Out[3]:
In [ ]:
In [4]:
columns = list(base_reader.loc[:,'NUMBER DRAWN 1':'NUMBER DRAWN 6'])
draws = base_reader[columns]
draws.tail()
Out[4]:
In [5]:
# set
b1 = draws["NUMBER DRAWN 1"]
# describe
b1.describe()
Out[5]:
In [6]:
# unique
b1vc = b1.value_counts()
b1vc.columns = ["value"]
b1vc
Out[6]:
In [7]:
b1vc.plot(kind='bar')
Out[7]:
In [8]:
b1vc_vals = b1vc.index
b1vc_vals
Out[8]:
In [11]:
fig1, ax1 = plt.subplots()
explode = (0.05, 0)
ax1.pie(b1vc, autopct='%1.0f%%', labels=b1vc_vals, shadow=True, startangle=90)
ax1.axis('equal')
plt.legend(title="Legend")
plt.show()
In [16]:
# Filter Columns to Date and Ball #1
draw_dates_1_list = list(base_reader.loc[:,'DRAW DATE':'NUMBER DRAWN 1'])
draw_dates_1 = base_reader[draw_dates_1_list]
draw_dates_1.tail()
Out[16]:
In [19]:
# Filter to 2018 Draws
draw_dates_1_2018 = draw_dates_1[(draw_dates_1['DRAW DATE'] > '2018-0-0')]
draw_dates_1_2018.head()
Out[19]:
In [22]:
# Filter to display all #1 draws during 2018
draw_dates_ball_one_number_one = draw_dates_1_2018[(draw_dates_1_2018['NUMBER DRAWN 1'] == 1)]
draw_dates_ball_one_number_one
Out[22]:
In [37]:
# Let's see what percent of the draws in 2018 are Ball #1
In [38]:
percent_ball_one = len(draw_dates_1_2018.index) / len(draw_dates_ball_one_number_one.index)
percent_ball_one
Out[38]:
In [39]:
# This seems to match with our results from Cell 11... let's verify
In [53]:
# ball_one_number_one_list = list(base_reader.loc[:,'NUMBER DRAWN 1'])
# ball_one_number_one = base_reader[ball_one_number_one_list]
# ball_one_number_one
In [45]:
ball_one_number_one_draws = draws[(draws['NUMBER DRAWN 1'] == 1)]
ball_one_number_one_draws
percent_ball_one = len(draw_dates_1_2018.index) / len(ball_one_number_one_draws.index)
percent_ball_one
Out[45]:
In [ ]:
In [8]:
# set
b2 = draws["NUMBER DRAWN 2"]
# describe
b2.describe()
Out[8]:
In [ ]:
In [9]:
# unique
b2vc = b2.value_counts()
b2vc.columns = ["value"]
b2vc
Out[9]:
In [ ]:
In [10]:
b2vc.plot(kind='bar')
Out[10]:
In [56]:
b2vc_vals = b2vc.index
b2vc_vals
Out[56]:
In [ ]:
In [57]:
fig1, ax1 = plt.subplots()
explode = (0.05, 0)
ax1.pie(b2vc, autopct='%1.0f%%', labels=b2vc_vals, shadow=True, startangle=90)
ax1.axis('equal')
plt.legend(title="Legend")
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [11]:
# set
b3 = draws["NUMBER DRAWN 3"]
# describe
b3.describe()
Out[11]:
In [ ]:
In [12]:
# unique
b3vc = b3.value_counts()
b3vc.columns = ["value"]
b3vc
Out[12]:
In [ ]:
In [13]:
b3vc.plot(kind='bar')
Out[13]:
In [54]:
b3vc_vals = b3vc.index
b3vc_vals
Out[54]:
In [ ]:
In [55]:
fig1, ax1 = plt.subplots()
explode = (0.05, 0)
ax1.pie(b3vc, autopct='%1.0f%%', labels=b3vc_vals, shadow=True, startangle=90)
ax1.axis('equal')
plt.legend(title="Legend")
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [14]:
# set
b4 = draws["NUMBER DRAWN 4"]
# describe
b4.describe()
Out[14]:
In [ ]:
In [15]:
# unique
b4vc = b4.value_counts()
b4vc.columns = ["value"]
b4vc
Out[15]:
In [ ]:
In [16]:
b4vc.plot(kind='bar')
Out[16]:
In [60]:
b4vc_vals = b4vc.index
b4vc_vals
Out[60]:
In [ ]:
In [61]:
fig1, ax1 = plt.subplots()
explode = (0.05, 0)
ax1.pie(b4vc, autopct='%1.0f%%', labels=b4vc_vals, shadow=True, startangle=90)
ax1.axis('equal')
plt.legend(title="Legend")
plt.show()
In [ ]:
In [ ]:
In [17]:
# set
b5 = draws["NUMBER DRAWN 5"]
# describe
b5.describe()
Out[17]:
In [ ]:
In [18]:
# unique
b5vc = b5.value_counts()
b5vc.columns = ["value"]
b5vc
Out[18]:
In [ ]:
In [19]:
b5vc.plot(kind='bar')
Out[19]:
In [62]:
b5vc_vals = b5vc.index
b5vc_vals
Out[62]:
In [ ]:
In [63]:
fig1, ax1 = plt.subplots()
explode = (0.05, 0)
ax1.pie(b5vc, autopct='%1.0f%%', labels=b5vc_vals, shadow=True, startangle=90)
ax1.axis('equal')
plt.legend(title="Legend")
plt.show()
In [ ]:
In [ ]:
In [20]:
# set
b6 = draws["NUMBER DRAWN 6"]
# describe
b6.describe()
Out[20]:
In [ ]:
In [21]:
# unique
b6vc = b6.value_counts()
b6vc.columns = ["value"]
b6vc
Out[21]:
In [ ]:
In [22]:
b6vc.plot(kind='bar')
Out[22]:
In [64]:
b6vc_vals = b6vc.index
b6vc_vals
Out[64]:
In [ ]:
In [65]:
fig1, ax1 = plt.subplots()
explode = (0.05, 0)
ax1.pie(b6vc, autopct='%1.0f%%', labels=b6vc_vals, shadow=True, startangle=90)
ax1.axis('equal')
plt.legend(title="Legend")
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [66]:
total_counts = []
total_values = []
for index, column in enumerate(draws.columns):
df = draws[column].value_counts().to_frame()
df.columns = ["value"]
val = df.iloc[0]
total_counts.append(val.name)
total_values.append(val.value)
print("Most drawn number for %s is the number %s at %s times" % (draws.columns[index], str(val.name), str(val.item())) )
In [ ]:
In [ ]:
In [24]:
total_counts
Out[24]:
In [ ]:
In [25]:
total_values
Out[25]:
In [ ]:
In [26]:
index = ['Ball 1','Ball 2','Ball 3','Ball 4','Ball 5','Ball 6']
count = 'number chosen'
value = 'times chosen'
columns = [count,value]
df = pd.DataFrame(index=index, columns=columns)
df[count] = total_counts
df[value] = total_values
In [ ]:
In [27]:
df
Out[27]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [2]:
In [ ]:
In [ ]:
In [ ]:
In [3]:
In [ ]:
In [ ]:
In [ ]: