
You will need to run run_me_first and calculate_tau first.

This script finds patterns in pi and tau by performing string searches. The search criteria are divided into four groups:

Group 1: patterns consisting of a string of single digits with a defined startpoint and variable length

  • repeated digits
  • digits consecutively increasing or decreasing by 1
  • the digits of pi, tau, e or root 2

Group 2: patterns consisting of a string of digits with a defined startpoint and variable internal length (e.g. for the primes, 2357 and 235711 would be positive matches but 23571 would not.)

  • primes or the fibonacci sequences (starting at 0 or 1)

Group 3: regex patterns

  • consecutive even, odd or prime numbers
  • consecutive digits not containing one particular digit
  • consecutive digits above or below the digit average of 4.5
  • consecutive binary digits (0 or 1)

Group 4: finding specific digits

  • Jenny's number (8675309)
  • The Sir Mix-A-Lot number (18007476492568)
  • All 10 digits in increasing or decreasing order
  • The Kennedy assassination date (112263)
  • The number of the beast (666); it counts any string of 3 or more ocnsecutive 6s as one match

To avoid missing matches over a file break, the first 1000 digits from the next file are added to the end

Note: the dict that kept the results was buggy so I removed it and output everything to stdout and to a list saved as json, which can be parsed. Group 4, which keeps counts, not positions, is still saved as a dict.

In [1]:
# in case you want to redo only parts of the analyses, you can change any of these to False.

do_group_1 = True
do_group_2 = True
do_group_3 = True
do_group_4 = True

import re
import json
import time

# tests have three elements: abbreviation, full description, and a tuplet
# of search terms
# note that if a match as long as the search term appears for sequences,
# the search term will need to be lengthened.

group_1 = [
('rep', 'repeating digits', ('00000000000000000000',
('consec', 'consecutive increasing or decreasing digits', ('01234567890123456789',
('pi', 'the digits of pi', ['3141592653589793238462643']),
('tau', 'the digits of tau', ['6283185307179586476925286']),
('e', "the digits of Euler's number", ['2718281828459045235360287']),
('root2', 'the digits of the square root of two', ['1414213562373095048801688'])]

# the last list is the positions new members of the group start at.
group_2 = [('primes', 'the sequence of prime numbers', ['235711131719232931374143475359'], 
                             ('fib0', 'fibonacci sequence starting at 0',['01123581321345589144233'], 
                             ('fib1', 'fibonacci sequence starting at 1',['1123581321345589144233'], 

group_3 = [('primed', 'prime digits', ['[2357]']),
             ('even', 'even digits', ['[02468]']), 
             ('odd', 'odd digits', ['[13579]']),
             ('not', 'not containing a particular digit', ('[^0]', '[^1]', '[^2]', '[^3]', '[^4]', '[^5]', 
                                                           '[^6]', '[^7]', '[^8]', '[^9]')),
             ('5 or more', 'digits greater than average value of 4.5', ['[56789]']),
             ('4 or less', 'digits less than average value of 4.5', ['[01234]']),
             ('binary', 'zero or one', ['01'])]

group_4 = ['8675309', '112263', '18007476492568', '[^6]666', '0123456789', '9876543210']

In [2]:
starttime = time.time()

result = []

for constant in ['pi', 'tau']:

    group_1_results = {}
    for item in group_1:
        group_1_results[item[0]] = [5, 0]
    group_2_results = {}
    for item in group_2:
        group_2_results[item[0]] = [4, 0]
    group_3_results = {}
    for item in group_3:
        group_3_results[item[0]] = [5, 0]
    group_4_results = {}
    for item in group_4:
        group_4_results[item] = [0, 0]    
    for i in range(10):
        infilename = 'data/' + constant + '100m.dectxt.00%d' % (i)
        with open(infilename, 'r') as fin:
            digits =
        print "processing", infilename
        if i < 9:
            next1K = 'data/' + constant + '100m.dectxt.00%d.1K' % (i+1)
            with open(next1K, 'r') as fin2:
                moredigits =
            digits += moredigits
        if do_group_1:
            for test in group_1:
                #print 'testing ',test[1],
                curr_max = group_1_results[name][0]
                curr_pos = group_1_results[name][1]
                orig_max = curr_max
                for criterion in criteria:
                    found = True
                    while found == True:
                        to_search = criterion[:curr_max]
                        found_pos = digits.find(to_search)
                        if found_pos != -1 and found_pos < 100000000:
                            found_pos += i * 100000000
                            result.append([constant, test[0], to_search, found_pos])
                            if curr_max == orig_max: #if a search of the same length but at earlier position is found, it becomes the frontrunner
                                if found_pos < curr_pos or curr_pos == 0: # I suspect this is the buggy part
                                    curr_pos = found_pos
                                curr_pos = found_pos
                            curr_max += 1
                            found = False
                    group_1_results[name][0] = curr_max
                    group_1_results[name][1] = curr_pos
                    #print ' '
        if do_group_2:
            for test in group_2:
                #print 'testing ',test[1],
                placelist = test[3]
                curr_max = group_2_results[name][0]
                curr_pos = group_2_results[name][1]
                orig_max = curr_max
                for criterion in criteria:
                    found = True
                    while found == True:
                        to_search = criterion[:curr_max]
                        found_pos = digits.find(to_search)
                        if found_pos != -1 and found_pos < 100000000:
                            found_pos += i * 100000000
                            result.append([constant, test[0], to_search, found_pos])
                            if curr_max == orig_max:
                                if found_pos < curr_pos or curr_pos == 0:
                                    curr_pos = found_pos
                                curr_pos = found_pos
                            curr_max += 1
                            while curr_max not in placelist:
                                curr_max += 1
                                if curr_max >25:
                            found = False
                    group_2_results[name][0] = curr_max
                    group_2_results[name][1] = curr_pos
                    #print ' '
        if do_group_3:
            for test in group_3:
                #print 'testing ',test[1],
                curr_max = group_3_results[name][0]
                curr_pos = group_3_results[name][1]
                orig_max = curr_max
                for criterion in criteria:
                    found = True
                    while found == True:
                        to_search = criterion + '{%s}' % (curr_max)
                        regexobj =, digits)
                        if regexobj:
                            found_pos = regexobj.start()
                            if found_pos < 100000000:
                                found_pos += i * 100000000
                                result.append([constant, test[0], to_search, found_pos])
                                if curr_max == orig_max:
                                    if found_pos < curr_pos or curr_pos == 0:
                                        curr_pos = found_pos
                                    curr_pos = found_pos
                                curr_max += 1
                                found = False
                            found = False
                    group_3_results[name][0] = curr_max
                    group_3_results[name][1] = curr_pos
                    #print ' '
        if do_group_4:  
            for item in group_4:
                #print 'testing ',item
                start = group_4_results[item][0]
                count = group_4_results[item][1]
                reitobj = re.finditer(item,digits)
                count = 0
                for refobj in reitobj:
                    if count==0 and refobj.start() < 100000000:
                        start = refobj.start()
                    if refobj.start() < 100000000:
                        count += 1
                if count > 0:
                    start += i * 100000000
                    if group_4_results[item][0] == 0:
                        group_4_results[item][0] = start
                    group_4_results[item][1] += count
    #     print '\n==='
    #     print group_1_results # note all lengths are +1 due to fencepost counting
    #     print group_2_results
    #     print group_3_results
    #     print group_4_results
    #     print "Total elapsed time: %0.1f minutes." % ((time.time() - starttime)/60)
    #     print '\n=================================================\n\n'
    with open(constant+'_search_results', 'w+') as f3:
    if do_group_4:
        with open(constant+'_search_results_4', 'w+') as f4:

processing data/pi100m.dectxt.000
processing data/pi100m.dectxt.001
processing data/pi100m.dectxt.002
processing data/pi100m.dectxt.003
processing data/pi100m.dectxt.004
processing data/pi100m.dectxt.005
processing data/pi100m.dectxt.006
processing data/pi100m.dectxt.007
processing data/pi100m.dectxt.008
processing data/pi100m.dectxt.009
processing data/tau100m.dectxt.000
processing data/tau100m.dectxt.001
processing data/tau100m.dectxt.002
processing data/tau100m.dectxt.003
processing data/tau100m.dectxt.004
processing data/tau100m.dectxt.005
processing data/tau100m.dectxt.006
processing data/tau100m.dectxt.007
processing data/tau100m.dectxt.008
processing data/tau100m.dectxt.009

