You will need to run run_me_first
and calculate_tau
first.
This script finds patterns in pi and tau by performing string searches. The search criteria are divided into four groups:
Group 1: patterns consisting of a string of single digits with a defined startpoint and variable length
Group 2: patterns consisting of a string of digits with a defined startpoint and variable internal length (e.g. for the primes, 2357 and 235711 would be positive matches but 23571 would not.)
Group 3: regex patterns
Group 4: finding specific digits
To avoid missing matches over a file break, the first 1000 digits from the next file are added to the end
In [1]:
# in case you want to redo only parts of the analyses, you can change any of these to False.
do_group_1 = True
do_group_2 = True
do_group_3 = True
do_group_4 = True
import re
import json
import time
################################################################################
# DEFINE SEARCH TERMS
# tests have three elements: abbreviation, full description, and a tuplet
# of search terms
# note that if a match as long as the search term appears for sequences,
# the search term will need to be lengthened.
group_1 = [
('rep', 'repeating digits', ('00000000000000000000',
'11111111111111111111',
'22222222222222222222',
'33333333333333333333',
'44444444444444444444',
'55555555555555555555',
'66666666666666666666',
'77777777777777777777',
'88888888888888888888',
'99999999999999999999')),
('consec', 'consecutive increasing or decreasing digits', ('01234567890123456789',
'12345678901234567890',
'23456789012345678901',
'34567890123456789012',
'45678901234567890123',
'56789012345678901234',
'67890123456789012345',
'78901234567890123456',
'89012345678901234567',
'90123456789012345678',
'01234567890123456789',
'98765432109876543210',
'87654321098765432109',
'76543210987654321098',
'65432109876543210987',
'54321098765432109876',
'43210987654321098765',
'32109876543210987654',
'21098765432109876543',
'10987654321098765432',
'09876543210987654321')),
('pi', 'the digits of pi', ['3141592653589793238462643']),
('tau', 'the digits of tau', ['6283185307179586476925286']),
('e', "the digits of Euler's number", ['2718281828459045235360287']),
('root2', 'the digits of the square root of two', ['1414213562373095048801688'])]
# the last list is the positions new members of the group start at.
group_2 = [('primes', 'the sequence of prime numbers', ['235711131719232931374143475359'],
[1,2,3,4,6,8,10,12,14,16,18,20,22]),
('fib0', 'fibonacci sequence starting at 0',['01123581321345589144233'],
[1,2,3,4,5,6,7,9,11,13,15,17,30,23]),
('fib1', 'fibonacci sequence starting at 1',['1123581321345589144233'],
[1,2,3,4,5,6,8,10,12,14,16,19,22])]
group_3 = [('primed', 'prime digits', ['[2357]']),
('even', 'even digits', ['[02468]']),
('odd', 'odd digits', ['[13579]']),
('not', 'not containing a particular digit', ('[^0]', '[^1]', '[^2]', '[^3]', '[^4]', '[^5]',
'[^6]', '[^7]', '[^8]', '[^9]')),
('5 or more', 'digits greater than average value of 4.5', ['[56789]']),
('4 or less', 'digits less than average value of 4.5', ['[01234]']),
('binary', 'zero or one', ['01'])]
group_4 = ['8675309', '112263', '18007476492568', '[^6]666', '0123456789', '9876543210']
In [2]:
starttime = time.time()
result = []
for constant in ['pi', 'tau']:
group_1_results = {}
for item in group_1:
group_1_results[item[0]] = [5, 0]
group_2_results = {}
for item in group_2:
group_2_results[item[0]] = [4, 0]
group_3_results = {}
for item in group_3:
group_3_results[item[0]] = [5, 0]
group_4_results = {}
for item in group_4:
group_4_results[item] = [0, 0]
for i in range(10):
infilename = 'data/' + constant + '100m.dectxt.00%d' % (i)
with open(infilename, 'r') as fin:
digits = fin.read()
print "processing", infilename
if i < 9:
next1K = 'data/' + constant + '100m.dectxt.00%d.1K' % (i+1)
with open(next1K, 'r') as fin2:
moredigits = fin2.read()
digits += moredigits
###############################################################
if do_group_1:
for test in group_1:
#print 'testing ',test[1],
name=test[0]
criteria=test[2]
curr_max = group_1_results[name][0]
curr_pos = group_1_results[name][1]
orig_max = curr_max
for criterion in criteria:
found = True
while found == True:
to_search = criterion[:curr_max]
found_pos = digits.find(to_search)
if found_pos != -1 and found_pos < 100000000:
found_pos += i * 100000000
result.append([constant, test[0], to_search, found_pos])
if curr_max == orig_max: #if a search of the same length but at earlier position is found, it becomes the frontrunner
if found_pos < curr_pos or curr_pos == 0: # I suspect this is the buggy part
curr_pos = found_pos
else:
curr_pos = found_pos
curr_max += 1
else:
found = False
group_1_results[name][0] = curr_max
group_1_results[name][1] = curr_pos
#print ' '
#################################################################
if do_group_2:
for test in group_2:
#print 'testing ',test[1],
name=test[0]
criteria=test[2]
placelist = test[3]
curr_max = group_2_results[name][0]
curr_pos = group_2_results[name][1]
orig_max = curr_max
for criterion in criteria:
found = True
while found == True:
to_search = criterion[:curr_max]
found_pos = digits.find(to_search)
if found_pos != -1 and found_pos < 100000000:
found_pos += i * 100000000
result.append([constant, test[0], to_search, found_pos])
if curr_max == orig_max:
if found_pos < curr_pos or curr_pos == 0:
curr_pos = found_pos
else:
curr_pos = found_pos
curr_max += 1
while curr_max not in placelist:
curr_max += 1
if curr_max >25:
break
else:
found = False
group_2_results[name][0] = curr_max
group_2_results[name][1] = curr_pos
#print ' '
############################################################
if do_group_3:
for test in group_3:
#print 'testing ',test[1],
name=test[0]
criteria=test[2]
curr_max = group_3_results[name][0]
curr_pos = group_3_results[name][1]
orig_max = curr_max
for criterion in criteria:
found = True
while found == True:
to_search = criterion + '{%s}' % (curr_max)
regexobj = re.search(to_search, digits)
if regexobj:
found_pos = regexobj.start()
if found_pos < 100000000:
found_pos += i * 100000000
result.append([constant, test[0], to_search, found_pos])
if curr_max == orig_max:
if found_pos < curr_pos or curr_pos == 0:
curr_pos = found_pos
else:
curr_pos = found_pos
curr_max += 1
else:
found = False
else:
found = False
group_3_results[name][0] = curr_max
group_3_results[name][1] = curr_pos
#print ' '
##############################################################
if do_group_4:
for item in group_4:
#print 'testing ',item
start = group_4_results[item][0]
count = group_4_results[item][1]
reitobj = re.finditer(item,digits)
count = 0
for refobj in reitobj:
if count==0 and refobj.start() < 100000000:
start = refobj.start()
if refobj.start() < 100000000:
count += 1
if count > 0:
start += i * 100000000
if group_4_results[item][0] == 0:
group_4_results[item][0] = start
group_4_results[item][1] += count
#######################################################
# print '\n==='
# print group_1_results # note all lengths are +1 due to fencepost counting
# print group_2_results
# print group_3_results
# print group_4_results
# print "Total elapsed time: %0.1f minutes." % ((time.time() - starttime)/60)
# print '\n=================================================\n\n'
with open(constant+'_search_results', 'w+') as f3:
f3.write(json.dumps(result))
if do_group_4:
with open(constant+'_search_results_4', 'w+') as f4:
f4.write(json.dumps(group_4_results))
In [ ]: