In [1]:
import pandas as pd
from cStringIO import StringIO
import random
from decimal import Decimal
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def xstrtod(s):
    return pd.read_csv(StringIO(s), header=None)[0][0]

In [3]:
def randnum(length):
    s = '1.'
    for i in range(length):
        s += random.choice('0123456789')
    return s

In [4]:
def ulp(s):
    num = Decimal(s)
    f = np.float64(s)
    a = f.view((np.uint8, 8))
    # Since this is uint8 make sure the result doesn't accidentally wrap
    if a[0] == 0:
        a[0] = 1
    elif a[0] == 255:
        a[0] = 254
    elif Decimal(f) < num:
        a[0] += 1
    elif Decimal(f) > num:
        a[0] -= 1
    f2 = a.view(np.float64)[0]
    return abs(f2 - f)

In [5]:
def test_conversion(sig_figs):
    y = []
    diffs = []
    great_vals = 0
    good_vals = 0

    for i in range(1000):
        val = randnum(sig_figs)
        guess = xstrtod(val)
        decimal_diff = abs(Decimal(val) - Decimal(guess))
        ulp_diff = decimal_diff / Decimal(ulp(val))
        y.append(float(ulp_diff))
        diffs.append(float(decimal_diff))
        if float(ulp_diff) < 1.0:
            good_vals += 1
        if float(ulp_diff) < 0.5:
            great_vals += 1

    return (great_vals, good_vals, y, diffs)

In [6]:
def graph_results(sig_figs):
    great_vals, good_vals, y, diffs = test_conversion(sig_figs)
    print('{0}% of values within 0.5 ULP'.format(great_vals * 100.0 / 1000))
    print('{0}% of values within 1.0 ULP'.format(good_vals * 100.0 / 1000))
    plt.hist(y, bins=30, log=True)
    plt.xlabel('Conversion error in ULP')
    plt.ylabel('Logarithm of frequency')
    plt.show()
    plt.hist(diffs, bins=50, log=True)
    plt.xlabel('Decimal conversion error')
    plt.ylabel('Logarithm of frequency')
    plt.show()

In [7]:
graph_results(5)


100.0% of values within 0.5 ULP
100.0% of values within 1.0 ULP

In [8]:
graph_results(10)


100.0% of values within 0.5 ULP
100.0% of values within 1.0 ULP

In [9]:
graph_results(15)


100.0% of values within 0.5 ULP
100.0% of values within 1.0 ULP

In [10]:
graph_results(20)


59.8% of values within 0.5 ULP
91.6% of values within 1.0 ULP

In [11]:
graph_results(45)


57.8% of values within 0.5 ULP
90.4% of values within 1.0 ULP

In [12]:
graph_results(100)


61.0% of values within 0.5 ULP
90.6% of values within 1.0 ULP