In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
import timeit
import numpy as np

In [2]:
## BUBBLESORT(A)
def bubbleSort(A):
    # 1 for i = 1 to A.length - 1
    for i in range(len(A) - 1):
        # 2 for j = A.length downto i + 1
        for j in range(len(A) - 1, i, -1):
            # 3 if A[j] < A[j - 1]
            if (A[j] < A[j - 1]):
                # 4 exchange A[j] with A[j - 1]
                A[j], A[j - 1] = A[j - 1], A[j]

In [3]:
## INSERTION-SORT(A)
def insertSort(A):
    # 1 for j = 2 to A.length
    for j in range(len(A)):
        # 2 key = A[j]
        temp = A[j]
        # 3 // Insert A[j] into the sorted sequence A[1 .. j - 1].
        # 4 i = j - 1
        i = j - 1
        # 5 while i > 0 and A[i] > key
        while ((i > 0) & (A[i] > temp)):
            # 6 A[i + 1] = A[i]
            A[i + 1] = A[i]
            # 7 i = i - 1
            i = i - 1
            # 8 A[i + 1] = key
            A[i + 1] = temp

In [4]:
# This is just a wrapper function for the timings
def wrapper(func, *args, **kwargs):
    def wrap():
        return func(*args, **kwargs)
    return wrap

In [ ]:
#### For plot 1
## Create 3 lists of 10 sublists of n random numbers to avoid skewing timing:
listsBubble5001 = [[] for _ in range(25)]
listsInsert5001 = [[] for _ in range(25)]

n = 0
for i in range(25):
    n += 2000 
    listsBubble5001[i] = random.sample(range(1, 200001), n)
    listsInsert5001[i] = random.sample(range(1, 200001), n)
    
listLen = [len(listsBubble5001[i]) for i in range(len(listsBubble5001))]

In [ ]:
# Create empty lists to store timings
# bubbleSort timings
bubble5001 = []
insert5001 = []

# Loop through and time the 25 random lists * 10 times for each of the three lengths
for i in range(len(listsBubble5001)):
    bubble5001.append(timeit.timeit(wrapper(bubbleSort, listsBubble5001[i]), number=10))
    insert5001.append(timeit.timeit(wrapper(insertSort, listsInsert5001[i]), number=10))
    
bubble1 = [np.mean(bubble5001)]
insert1 = [np.mean(insert5001)]

In [ ]:
graph = pd.DataFrame({'n': listLen, 
                      'bubble time (random)': bubble5001,
                      'insert time (random)': insert5001})
graph = graph[['n', 'bubble time (random)', 'insert time (random)']]

In [ ]:
# red dashes, blue squares and green triangles
plt.plot(graph['n'], graph['bubble time (random)'], 'r--')
plt.plot(graph['n'], graph['insert time (random)'], 'b--')
plt.show()

In [ ]:
#### Plot 2
## Create 3 lists of 10 sublists of n random numbers to avoid skewing timing:
listsBubble5002 = [[] for _ in range(25)]
listsInsert5002 = [[] for _ in range(25)]

# Create data, store in list, and sort in increasing order
n = 0
for i in range(25):
    n += 2000 
    listsBubble5002[i] = np.sort(random.sample(range(1, 200001), n), axis=None)
    listsInsert5002[i] = np.sort(random.sample(range(1, 200001), n), axis=None)

# Create empty lists to store timings
# bubbleSort timings
bubble5002 = []
insert5002 = []

# Loop through and time the 25 sorted random lists * 1 times
for i in range(len(lists5002)):
    bubble5002.append(timeit.timeit(wrapper(bubbleSort, listsBubble5002[i]), number=1))
    insert5002.append(timeit.timeit(wrapper(insertSort, listsInsert5002[i]), number=1))

In [ ]:
# Add results to dataframe
graph['bubble time (random asc.)'] = bubble5002
graph['insert time (random asc.)'] = insert5002

In [ ]:
plt.plot(graph['n'], graph['bubble time (random asc.)'], 'r--')
plt.plot(graph['n'], graph['insert time (random asc.)'], 'b--')
plt.show()

In [ ]:
#### Plot 3
## Create a list of 25 sublists of n random numbers to avoid skewing timing:
listsBubble5003 = [[] for _ in range(25)]
listsInsert5003 = [[] for _ in range(25)]

# Create data, store in list, and sort in increasing order
n = 0
for i in range(25):
    n += 2000 
    listsBubble5003[i] = np.sort(random.sample(range(1, 200001), n), axis=None)
    listsInsert5003[i] = np.sort(random.sample(range(1, 200001), n), axis=None)

for i in range(25):
    listsBubble5003[i][::-1].sort()
    listsInsert5003[i][::-1].sort()
    
# Create empty lists to store timings
# bubbleSort timings
bubble5003 = []
insert5003 = []

# Loop through and time the 25 sorted random lists * 1 times
for i in range(len(lists5003)):
    bubble5003.append(timeit.timeit(wrapper(bubbleSort, listsBubble5003[i]), number=1))
    insert5003.append(timeit.timeit(wrapper(insertSort, listsInsert5003[i]), number=1))

In [ ]:
# Add results to dataframe
graph['bubble time (random desc.)'] = bubble5003
graph['insert time (random desc.)'] = insert5003
graph

In [ ]:
plt.plot(graph['n'], graph['bubble time (random desc.)'], 'r--')
plt.plot(graph['n'], graph['insert time (random desc.)'], 'b--')
plt.show()

Explain Your Choices:

  • Explain any platform/language choices that you made for your code/plots.
    • I chose python since I am most comfortable in python. For the code, I adapted the psuedo code from the book and used that psuedo code as comments in-line to highlight how I adapted the steps.
  • How did you create/store your data that you used to make the plots?
    • I used a for loop and numpy's random sample function to create a list with 25 randomly sampled sublists of size with increasing n from 2000 - 50000 for plot 1. For plots 2 and 3 I used similar approach but sorted in ascending and descending order, respectively to meet the constraints instructed. I took the 10 timings for each of the 25 sets of data for plot 1 and plotted in matplotlib. I only took one timing for the purposes of plots 2 & 3 per the faq posted on piazza: https://piazza.com/class/j63o8j0x6m3123?cid=18
    • For these graphs, Red represents the bubbleSort algorithm timing; whereas blue represents the insertSort algorithm.
  • If you ran into any special difficulties or made any interesting observations, feel free to mention them here.
    • I originally ran into some difficulty with the nested for loop with bubble sort (step 2) and getting it to iterate through the entire list but eventually got it work.

Conclusions:

  • These two algorithms are supposed to have quadratic running time.
  • Does the first plot reflect this?
  • How do the two algorithms compare in terms of running time?
  • How about the second plot?
  • Do you think this one is quadratic? Why do you think it looks the way it does?
  • How does the third compare to the first and the second?
  • What kind of functions do you think you’re observing in the three plots (linear, logarithmic, quadratic, exponential, etc etc)?
  • Would you use these algorithms for real life data, and why?

In [ ]: