In [20]:
from typing import List, Iterator, NamedTuple, Dict
from enum import Enum, auto
from io import StringIO

class BFError(Exception):
    pass

Filling the Swear Jar

A tale of three languages

Alec Reiter (@justanr)

Brainfuck

  • Urban Mueller, 1993
  • Turning Complete Tarpit
  • 8 commands
  • Tape, Tape Pointer, Instruction Pointer
++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++.

Why the...

  • Rust! (but we'll get to that)
  • Different
  • Oddly fun

The 8 Commands

Command Meaning
+ Incr Cell
- Decr Cell
> Move Right
< Move Left
[ Conditional Jump (if cell is 0)
] Conditional Jump (if cell isn't 0)
. Output Cell
, Read into Cell

Common Constructs

  • [-] set current cell to 0
  • [->+<] add current cell to another
  • [->++<] multiplication
  • [<] find last zero cell

Ambiguities

  • "Infinite tape" -- reference impl uses 30,000 cells
  • How big are cells? -- u8 or u32? or signed?

So, implementations...

Turns out I have no idea what I'm doing

Python to the rescue

First Attempt


In [2]:
def run(prog: str, stdin: str="") -> StringIO:
    stdout = StringIO()
    memory = [0] * 30_000
    memptr = 0
    instrptr = 0
    progsize = len(prog)
    # stores the location of the last [ s we encountered
    brackets = []
    
    while instrptr < progsize:
        op = progsize[instrptr]
        instrptr += 1
        
        if op == '+':
            memory[memptr] += 1
        elif op == '-':
            memory[memptr] -= 1
        # and so on
        else: 
            # not a BF command
            pass
    
    stdout.seek(0)
    return stdout

Pros

  • Very simple
  • Jumping back is easy

Cons

  • Very naive
  • Jumping forward isn't easy
  • Incorrect programs not detected

Parsing


In [3]:
class BFToken(Enum):
    Incr = '+'
    Decr = '-'
    MoveL = '<'
    MoveR = '>'
    StdIn = ','
    StdOut = '.'
    JumpF = '['
    JumpB = ']'

In [4]:
partners = {
    BFToken.Incr: BFToken.Decr,
    BFToken.Decr: BFToken.Incr,
    BFToken.MoveL: BFToken.MoveR,
    BFToken.MoveR: BFToken.MoveL
}

In [5]:
def _parse(prog: str) -> Iterator[BFToken]:
    for char in prog:
        try:
            yield BFToken(char)
        except ValueError:
            pass

def parse(prog: str) -> List[BFToken]:
    return list(_parse(prog))

In [6]:
parse('++a+--')


Out[6]:
[<BFToken.Incr: '+'>,
 <BFToken.Incr: '+'>,
 <BFToken.Incr: '+'>,
 <BFToken.Decr: '-'>,
 <BFToken.Decr: '-'>]

Optimizing

  • Jump table
  • Combine like tokens

In [7]:
def collapse(prog: List[BFToken]) -> List[BFToken]:
    program = []
    
    for token in prog:
        ...
        # uh wait a second

Missing Something


In [8]:
class IRToken(NamedTuple):
    token: BFToken
    amount: int

In [9]:
def collapse(prog: List[BFToken]) -> List[IRToken]:
    program: List[IRToken] = []
    
    for token in prog:
        if len(program) == 0 or token not in partners:
            program.append(IRToken(token, 1))
            continue
        
        previous = program.pop()
        
        if previous.token == token:
            new_token = previous._replace(amount=previous.amount+1)
            if new_token.amount != 0:
                program.append(new_token)
        
        elif previous.token == partners[token]:
            new_token = previous._replace(amount=previous.amount-1)
            if new_token.amount != 0:
                program.append(new_token)
        
        else:
            program.append(previous)
            program.append(IRToken(token, 1))
            
    return program

In [10]:
def build_jump_table(prog: List[IRToken]):
    brackets = []
    
    for idx, token in enumerate(prog, 0):
        if token.token == BFToken.JumpF:
            brackets.append(idx)
            
        elif token.token == BFToken.JumpB:
            try:
                partner = brackets.pop()
            except IndexError:
                raise BFError(f"Unmatched bracket at: {idx}") from None
            else:
                prog[idx] = prog[idx]._replace(amount=partner)
                prog[partner] = prog[partner]._replace(amount=idx)
    
    if brackets:
        raise BFError(f"Unmatched brackets at: {', '.join([str(x) for x in brackets])}")

In [11]:
tokens = collapse(parse('++[->++++++++<]'))
build_jump_table(tokens)
tokens


Out[11]:
[IRToken(token=<BFToken.Incr: '+'>, amount=2),
 IRToken(token=<BFToken.JumpF: '['>, amount=6),
 IRToken(token=<BFToken.Decr: '-'>, amount=1),
 IRToken(token=<BFToken.MoveR: '>'>, amount=1),
 IRToken(token=<BFToken.Incr: '+'>, amount=8),
 IRToken(token=<BFToken.MoveL: '<'>, amount=1),
 IRToken(token=<BFToken.JumpB: ']'>, amount=1)]

In [31]:
def run(prog: List[IRToken], stdin: str="") -> StringIO:
    stdout = StringIO()
    stdiniter = iter(stdin)
    getc = lambda: ord(next(stdiniter, '\0'))
    putc = lambda: stdout.write(chr(memory[memptr]))
    memory = [0] * 30_000
    memptr = 0
    instrptr = 0
    proglength = len(prog)
    
    while instrptr < proglength:
        op = prog[instrptr]
        
        if op.token == BFToken.StdOut:
            putc()
        
        elif op.token == BFToken.StdIn:
            memory[memptr] = getc()
            
        elif op.token == BFToken.Incr:
            memory[memptr] += op.amount
        
        elif op.token == BFToken.Decr:
            memory[memptr] -= op.amount
        
        elif op.token == BFToken.MoveL:
            memptr = (memptr - op.amount) % 30_000
        
        elif op.token == BFToken.MoveR:
            memptr = (memptr + op.amount) % 30_000
        
        elif op.token == BFToken.JumpF:
            if memory[memptr] == 0:
                instrptr = op.amount
        
        elif op.token == BFToken.JumpB:
            if memory[memptr] != 0:
                instrptr = op.amount
        
        instrptr += 1
    
    stdout.seek(0)
    return stdout

def bf(source: str, stdin: str="") -> StringIO:
    prog = collapse(parse(source))
    build_jump_table(prog)
    return run(prog, stdin)

In [14]:
%%time
print(bf("++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++.").read())


Hello World!

CPU times: user 11.7 ms, sys: 3.42 ms, total: 15.1 ms
Wall time: 15.3 ms

In [16]:
triangle = """
                                >
                               + +
                              +   +
                             [ < + +
                            +       +
                           + +     + +
                          >   -   ]   >
                         + + + + + + + +
                        [               >
                       + +             + +
                      <   -           ]   >
                     > + + >         > > + >
                    >       >       +       <
                   < <     < <     < <     < <
                  <   [   -   [   -   >   +   <
                 ] > [ - < + > > > . < < ] > > >
                [                               [
               - >                             + +
              +   +                           +   +
             + + [ >                         + + + +
            <       -                       ]       >
           . <     < [                     - >     + <
          ]   +   >   [                   -   >   +   +
         + + + + + + + +                 < < + > ] > . [
        -               ]               >               ]
       ] +             < <             < [             - [
      -   >           +   <           ]   +           >   [
     - < + >         > > - [         - > + <         ] + + >
    [       -       <       -       >       ]       <       <
   < ]     < <     < <     ] +     + +     + +     + +     + +
  +   .   +   +   +   .   [   -   ]   <   ]   +   +   +   +   +
 * * * * * M a d e * B y : * N Y Y R I K K I * 2 0 0 2 * * * * *
"""

In [40]:
%%time
result = bf(triangle)


CPU times: user 327 ms, sys: 0 ns, total: 327 ms
Wall time: 326 ms

In [18]:
print(result.read())


                                *    
                               * *    
                              *   *    
                             * * * *    
                            *       *    
                           * *     * *    
                          *   *   *   *    
                         * * * * * * * *    
                        *               *    
                       * *             * *    
                      *   *           *   *    
                     * * * *         * * * *    
                    *       *       *       *    
                   * *     * *     * *     * *    
                  *   *   *   *   *   *   *   *    
                 * * * * * * * * * * * * * * * *    
                *                               *    
               * *                             * *    
              *   *                           *   *    
             * * * *                         * * * *    
            *       *                       *       *    
           * *     * *                     * *     * *    
          *   *   *   *                   *   *   *   *    
         * * * * * * * *                 * * * * * * * *    
        *               *               *               *    
       * *             * *             * *             * *    
      *   *           *   *           *   *           *   *    
     * * * *         * * * *         * * * *         * * * *    
    *       *       *       *       *       *       *       *    
   * *     * *     * *     * *     * *     * *     * *     * *    
  *   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *    
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *    


In [211]:
ZtoA = """>++[<+++++++++++++>-]<[[>+>+<<-]>[<+>-]++++++++
[>++++++++<-]>.[-]<<>++++++++++[>++++++++++[>++
++++++++[>++++++++++[>++++++++++[>++++++++++[>+
+++++++++[-]<-]<-]<-]<-]<-]<-]<-]++++++++++."""

In [182]:
%%time
print(bf(ZtoA).read())


ZYXWVUTSRQPONMLKJIHGFEDCBA

CPU times: user 38min 33s, sys: 142 ms, total: 38min 33s
Wall time: 38min 34s

Where are we spending time?

[ I-1 ] 26_000_000
[ M1 I10 [ I-1 ] M-1 I-1 ] -> 2_600_000
[ M1 I10 [ M1 I10 [ I-1 ] M-1 I-1 ] M-1 I-1 ] -> 260_000
[ M1 I10 [ M1 I10 [ M1 I10 [ I-1 ] M-1 I-1 ] M-1 I-1 ] M-1 I-1 ] -> 26_000
[ M1 I10 [ M1 I10 [ M1 I10 [ M1 I10 [ I-1 ] M-1 I-1 ] M-1 I-1 ] M-1 I-1 ] M-1 I-1 ] -> 2_600
[ M1 I10 [ M1 I10 [ M1 I10 [ M1 I10 [ M1 I10 [ I-1 ] M-1 I-1 ] M-1 I-1 ] M-1 I-1 ] M-1 I-1 ] M-1 I-1 ] -> 260

Idea

Transform [-] into memory[memptr] = 0


In [30]:
def handle_clear(tokens: List[BFToken]) -> List[BFToken]:
    program: List[BFToken] = []
    clear = [BFToken.JumpF, BFToken.Decr, BFToken.JumpB]
    
    for token in tokens:
        program.append(token)
        
        if len(program) < 3:
            continue
        
        last_three = program[-3:]
        
        if last_three == clear:
            program[-3:] = [BFToken.ZeroOut]
    
    return program

38min 34s

  • Python isn't known for being fast
  • Cython, numba, etc can help
  • but...

Rust

🎺🎺🎺

insert hype here

But seriously

  • Opt-in mutability
  • Algebraic Data Types
  • Functional + Imperative
  • High level but fast

Representation

enum BrainFuckToken {
    Move(isize),
    JumpF(usize),
    JumpB(usize),
    Incr(i32)
    StdIn,
    StdOut,
    ZeroOut
}

Parsing

impl BrainFuckToken {
  pub fn from_char(c: char) -> Option<BrainFuckToken> {
      match c {
          '+' => Some(BrainFuckToken::Incr(1)),
          '-' => Some(BrainFuckToken::Incr(-1)),
          '>' => Some(BrainFuckToken::Move(1)),
          '<' => Some(BrainFuckToken::Move(-1)),
          '.' => Some(BrainFuckToken::StdOut),
          ',' => Some(BrainFuckToken::StdIn),
          '[' => Some(BrainFuckToken::JumpF(0)),
          ']' => Some(BrainFuckToken::JumpB(0)),
          _ => None,
      }
  }
}

Jumps

fn build_jumps(tokens: &mut Vec<BrainFuckToken>) {
  let mut brackets = Vec::new();

  for idx in 0..tokens.len() {
      match tokens[idx] {
          BrainFuckToken::JumpF(_) => brackets.push(idx),
          BrainFuckToken::JumpB(_) => {
              let partner = brackets
                  .pop()
                  .unwrap_or_else(|| panic!("unmatched bracket at {}", idx));
              mem::replace(&mut tokens[idx], BrainFuckToken::JumpB(partner));
              mem::replace(&mut tokens[partner], BrainFuckToken::JumpF(idx));
          }
          _ => {}
      }
  }

  if brackets.len() != 0 {
      panic!("Unmatched brackets at: {:?}", brackets);
  }
}

Run loop

while let Some(instr) = self.ops.get(self.loc) {
  match *instr {
      BrainFuckToken::JumpF(x) => {
          if self.tape.get() == 0 {
              self.loc = x;
          } else {
              self.tracer.trace((self.loc, x));
          }
      }
      BrainFuckToken::JumpB(x) => {
          if self.tape.get() != 0 {
              self.loc = x;
          }
      }
      BrainFuckToken::Move(x) => self.tape.move_(x),
      BrainFuckToken::Incr(x) => self.tape.incr(x),
      BrainFuckToken::StdIn => self.tape.putc(input_iter.next().unwrap_or('\0')),
      BrainFuckToken::StdOut => out.push(self.tape.getc()).
      BrainFuckToken::ZeroOut => self.tape.put(0),
  }
  self.loc += 1;
}

But how fast?


In [36]:
%%bash
time ./bf triangle.bf > /dev/null


real	0m0.005s
user	0m0.000s
sys	0m0.004s

In [37]:
%%bash
time ./bf ZtoA.bf > /dev/null


real	0m0.527s
user	0m0.522s
sys	0m0.004s

In [38]:
%%bash
time ./bf mandel.bf > /dev/null


real	0m11.970s
user	0m11.904s
sys	0m0.008s

Questions?