In [1]:
import numpy as np
from random import random
import glob
import os
import sys

from astropy.table import Table

import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline

In [3]:
sizes = [50, 500, 1000, 2000, 5000, 10000, 50000, 100000, 500000, 1000000, 5000000]

In [120]:
def write(sequence, name):
    with open(name, 'w') as out:
        for val in sequence:
            out.write("{}\n".format(val))    

def gen_rand(size, name):
    data = np.arange(size)
    np.random.shuffle(data)
    write(data, name)

def gen_rev(size, name):
    write(np.arange(size)[::-1], name)
    
def gen_order(size, name):
    write(np.arange(size), name)
    
def gen_repeat(size, name):
    write(np.tile(np.arange(size/10), 10).astype(int), name)

In [121]:
for size in sizes:
    gen_rand(size, "../input/random_{}.txt".format(size))

    gen_rev(size, "../input/reverse_{}.txt".format(size))
    
    gen_order(size, "../input/order_{}.txt".format(size))
    
    gen_repeat(size, "../input/fewunique_{}.txt".format(size))

Analyze times


In [3]:
def parse_file(filename):
    sizes = []
    times = []
    order = []
    
    for line in open(filename).readlines():
        if line.strip().endswith('calculate'):
            line = line.strip().split(' ')
            order.append(line[2].split('/')[-1].split('_')[0])
            sizes.append(int(line[4]))
            times.append(float(line[7][:-2])/ 10**9)
    
    return order, sizes,times

In [4]:
cat insertion_timing.txt


#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/fewunique_1000000.txt
  Found 1000000 items to be sorted.
  insertionsort on ../input/fewunique_1000000.txt with 1000000 items took 256994102673ns to calculate
  Writing output to file: ../output/fewunique_1000000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/fewunique_100000.txt
  Found 100000 items to be sorted.
  insertionsort on ../input/fewunique_100000.txt with 100000 items took 794128109ns to calculate
  Writing output to file: ../output/fewunique_100000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/fewunique_10000.txt
  Found 10000 items to be sorted.
  insertionsort on ../input/fewunique_10000.txt with 10000 items took 25703089ns to calculate
  Writing output to file: ../output/fewunique_10000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/fewunique_1000.txt
  Found 1000 items to be sorted.
  insertionsort on ../input/fewunique_1000.txt with 1000 items took 3206121ns to calculate
  Writing output to file: ../output/fewunique_1000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/fewunique_2000.txt
  Found 2000 items to be sorted.
  insertionsort on ../input/fewunique_2000.txt with 2000 items took 3674129ns to calculate
  Writing output to file: ../output/fewunique_2000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/fewunique_50000.txt
  Found 50000 items to be sorted.
  insertionsort on ../input/fewunique_50000.txt with 50000 items took 201452630ns to calculate
  Writing output to file: ../output/fewunique_50000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/fewunique_5000.txt
  Found 5000 items to be sorted.
  insertionsort on ../input/fewunique_5000.txt with 5000 items took 6440671ns to calculate
  Writing output to file: ../output/fewunique_5000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/fewunique_500.txt
  Found 500 items to be sorted.
  insertionsort on ../input/fewunique_500.txt with 500 items took 1276341ns to calculate
  Writing output to file: ../output/fewunique_500.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/fewunique_50.txt
  Found 50 items to be sorted.
  insertionsort on ../input/fewunique_50.txt with 50 items took 206868ns to calculate
  Writing output to file: ../output/fewunique_50.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/order_1000000.txt
  Found 1000000 items to be sorted.
  insertionsort on ../input/order_1000000.txt with 1000000 items took 4226979ns to calculate
  Writing output to file: ../output/order_1000000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/order_100000.txt
  Found 100000 items to be sorted.
  insertionsort on ../input/order_100000.txt with 100000 items took 1756505ns to calculate
  Writing output to file: ../output/order_100000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/order_10000.txt
  Found 10000 items to be sorted.
  insertionsort on ../input/order_10000.txt with 10000 items took 1432131ns to calculate
  Writing output to file: ../output/order_10000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/order_1000.txt
  Found 1000 items to be sorted.
  insertionsort on ../input/order_1000.txt with 1000 items took 305396ns to calculate
  Writing output to file: ../output/order_1000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/order_2000.txt
  Found 2000 items to be sorted.
  insertionsort on ../input/order_2000.txt with 2000 items took 311948ns to calculate
  Writing output to file: ../output/order_2000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/order_50000.txt
  Found 50000 items to be sorted.
  insertionsort on ../input/order_50000.txt with 50000 items took 1532315ns to calculate
  Writing output to file: ../output/order_50000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/order_5000.txt
  Found 5000 items to be sorted.
  insertionsort on ../input/order_5000.txt with 5000 items took 405471ns to calculate
  Writing output to file: ../output/order_5000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/order_500.txt
  Found 500 items to be sorted.
  insertionsort on ../input/order_500.txt with 500 items took 272799ns to calculate
  Writing output to file: ../output/order_500.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/order_50.txt
  Found 50 items to be sorted.
  insertionsort on ../input/order_50.txt with 50 items took 298499ns to calculate
  Writing output to file: ../output/order_50.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/random_1000000.txt
  Found 1000000 items to be sorted.
  insertionsort on ../input/random_1000000.txt with 1000000 items took 81490019101ns to calculate
  Writing output to file: ../output/random_1000000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/random_100000.txt
  Found 100000 items to be sorted.
  insertionsort on ../input/random_100000.txt with 100000 items took 812087353ns to calculate
  Writing output to file: ../output/random_100000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/random_10000.txt
  Found 10000 items to be sorted.
  insertionsort on ../input/random_10000.txt with 10000 items took 25110896ns to calculate
  Writing output to file: ../output/random_10000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/random_1000.txt
  Found 1000 items to be sorted.
  insertionsort on ../input/random_1000.txt with 1000 items took 3326566ns to calculate
  Writing output to file: ../output/random_1000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/random_2000.txt
  Found 2000 items to be sorted.
  insertionsort on ../input/random_2000.txt with 2000 items took 3891139ns to calculate
  Writing output to file: ../output/random_2000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/random_50000.txt
  Found 50000 items to be sorted.
  insertionsort on ../input/random_50000.txt with 50000 items took 194280818ns to calculate
  Writing output to file: ../output/random_50000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/random_5000.txt
  Found 5000 items to be sorted.
  insertionsort on ../input/random_5000.txt with 5000 items took 6121951ns to calculate
  Writing output to file: ../output/random_5000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/random_500.txt
  Found 500 items to be sorted.
  insertionsort on ../input/random_500.txt with 500 items took 1350264ns to calculate
  Writing output to file: ../output/random_500.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/random_50.txt
  Found 50 items to be sorted.
  insertionsort on ../input/random_50.txt with 50 items took 207447ns to calculate
  Writing output to file: ../output/random_50.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/reverse_1000000.txt
  Found 1000000 items to be sorted.
  insertionsort on ../input/reverse_1000000.txt with 1000000 items took 155514205332ns to calculate
  Writing output to file: ../output/reverse_1000000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/reverse_100000.txt
  Found 100000 items to be sorted.
  insertionsort on ../input/reverse_100000.txt with 100000 items took 1511342887ns to calculate
  Writing output to file: ../output/reverse_100000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/reverse_10000.txt
  Found 10000 items to be sorted.
  insertionsort on ../input/reverse_10000.txt with 10000 items took 32003954ns to calculate
  Writing output to file: ../output/reverse_10000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/reverse_1000.txt
  Found 1000 items to be sorted.
  insertionsort on ../input/reverse_1000.txt with 1000 items took 2956343ns to calculate
  Writing output to file: ../output/reverse_1000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/reverse_2000.txt
  Found 2000 items to be sorted.
  insertionsort on ../input/reverse_2000.txt with 2000 items took 4887875ns to calculate
  Writing output to file: ../output/reverse_2000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/reverse_50000.txt
  Found 50000 items to be sorted.
  insertionsort on ../input/reverse_50000.txt with 50000 items took 380363184ns to calculate
  Writing output to file: ../output/reverse_50000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/reverse_5000.txt
  Found 5000 items to be sorted.
  insertionsort on ../input/reverse_5000.txt with 5000 items took 7711163ns to calculate
  Writing output to file: ../output/reverse_5000.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/reverse_500.txt
  Found 500 items to be sorted.
  insertionsort on ../input/reverse_500.txt with 500 items took 2470036ns to calculate
  Writing output to file: ../output/reverse_500.txt
#--------------------------#
#-- Starting file sorter --#
#--------------------------#
  Reading file ../input/reverse_50.txt
  Found 50 items to be sorted.
  insertionsort on ../input/reverse_50.txt with 50 items took 219922ns to calculate
  Writing output to file: ../output/reverse_50.txt

In [12]:
def create_table(file_list):
    data = []
    for infile in file_list:
        method = os.path.split(infile)[-1].split('_')[0]
        order, sizes, times = parse_file(infile)
        
        for (o, s, t) in zip(order, sizes, times):
            data.append((method, o, s, round(t, 6)))
            
    t = Table(rows=data, names=('method', 'order', 'size', 'time'))
    
    return t

In [13]:
info = create_table(glob.glob('*timing.txt'))

In [14]:
fig = plt.figure(figsize=(10, 20))

methods = sorted(list(set(info['method'])))

for i, method in enumerate(methods):
    print(method)
    ax = fig.add_subplot(len(methods), 1, i+1)
    
    index = np.where(info['method'] == method)[0]
    orders = sorted(list(set(info['order'][index])))
    
    #sindex = np.argsort(info['size'][index])
    #info[index][sindex].write(open('{}.tex'.format(method), 'w'), format='latex')
    
    for order in orders:
        index = np.where((info['method'] == method) &
                         (info['order'] == order))[0]
        
        ax.loglog(info['size'][index], 
                  info['time'][index], 
                  label=order, 
                  marker='o', 
                  ls='', 
                  ms=10,
                  alpha=.9)
        
        
    ax.legend(shadow=True, numpoints=1, loc='upper left')
    ax.set_title(method)
    ax.set_xlabel("Log File Size")
    ax.set_ylabel("Log Sorting Time (s)")
    ax.set_xlim(0, info['size'].max()*1.2)
    
fig.tight_layout()

fig.savefig('sorting_efficiency.pdf')


heap
heaprecursive
insertion
quick
quick100
quick50
quickmed

In [15]:
orderings = list(set(info['order']))
methods = sorted(list(set(info['method'])))
sizes = sorted(list(set(info['size'])))

In [16]:
for o in orderings:
    data = []
    for s in sizes:
        index = np.where((info['order'] == o ) &
                         (info['size'] == s))[0]
        data.append([s] + [info[index]['time'][info[index]['method'] == m].item() for m in methods])
        
    t = Table(rows=data, names=['size'] + methods)

    t.write('{}.tex'.format(o), format='latex')

In [17]:
t


Out[17]:
<Table length=9>
sizeheapheaprecursiveinsertionquickquick100quick50quickmed
int64float64float64float64float64float64float64float64
500.0002630.000250.000220.0002370.0003740.0003910.000374
5000.0006150.0007670.002470.0016970.0018380.0018060.000587
10000.0009490.0013740.0029560.0026940.002970.0029930.000765
20000.0020020.002910.0048880.004950.005060.0049150.001029
50000.0029150.0049760.0077110.0086810.0089950.0088880.002771
100000.0132010.0163630.0320040.0327410.0344950.0335540.005503
500000.0108240.0098570.3803630.4672970.4536850.4426810.006533
1000000.0142680.0130281.5113431.8087531.8134931.8091450.00865
10000000.0716080.06512155.514205181.413635183.928399194.7869480.034295

In [21]:
methods


Out[21]:
['heap',
 'heaprecursive',
 'insertion',
 'quick',
 'quick100',
 'quick50',
 'quickmed']

In [185]:
info[index]['time'][info[index]['method'] == 'heap'].item()


Out[185]:
0.100491692

In [ ]: