# Median Maintenance

The goal of this problem is to implement the "Median Maintenance" algorithm (covered in the Week 3 lecture on heap applications). The text file contains a list of the integers from 1 to 10000 in unsorted order; you should treat this as a stream of numbers, arriving one by one. Letting xi denote the ith number of the file, the kth median mk is defined as the median of the numbers x1,…,xk. (So, if k is odd, then mk is ((k+1)/2)th smallest number among x1,…,xk; if k is even, then mk is the (k/2)th smallest number among x1,…,xk.)

In the box below you should type the sum of these 10000 medians, modulo 10000 (i.e., only the last 4 digits). That is, you should compute (m1+m2+m3+⋯+m10000)mod10000.

OPTIONAL EXERCISE: Compare the performance achieved by heap-based and search-tree-based implementations of the algorithm.

``````

In [14]:

# heap structure. a binary tree where the key values of children is larger than the key value of the parent
class myHeapArray(object):

def __init__(self, minheap=True):
self.heap = []
self.minheap = minheap

def bubbleDown(self):

n = len(self.heap)-1
p = 0

while (2*p <= n):
c1 = 2*p

if 2*p + 1 <= n:
c2 = 2*p + 1
# Second child exists

cdt = (self.heap[c1] < self.heap[c2]) if self.minheap else (self.heap[c1] > self.heap[c2])
if cdt:
c_val, c = (self.heap[c1], c1)
else:
c_val, c = (self.heap[c2], c2)
else:
c_val, c = (self.heap[c1], c1)

# Swap parent with the child with the smallest key value
cdt = (self.heap[p] >  c_val) if self.minheap else (self.heap[p] <  c_val)
if cdt:
tmp = self.heap[p]
self.heap[p] = c_val
self.heap[c] = tmp
p = c
else:
break

def extractMin(self):
if not self.heap:
return None

val = self.heap[0]

# copy last value to root, then remove and discard.
last = len(self.heap) - 1
self.heap[0] = self.heap[last]
self.heap.pop()

self.bubbleDown()

return val

def bubbleUp(self):
# for parent at node i, children is at node 2*i and 2*i + 1
# for child i, parent is at i/2 if i is even or floor(i/2) if i is odd

# Bubble Up starting the newly added key at the end of the heap

c = len(self.heap) - 1
while c!= 0:
p = int(c / 2)

cdt = (self.heap[p] > self.heap[c]) if self.minheap else (self.heap[p] < self.heap[c])
if cdt:
tmp = self.heap[p]
self.heap[p] = self.heap[c]
self.heap[c] = tmp
c = p
else:
break

def insert(self,elem):
self.heap.append(elem)

self.bubbleUp()

def insertList(self, elemList):
for elem in elemList:
self.heap.append(elem)
self.bubbleUp()

def get_ordered_list(self):
ordered = []

while True:
n = self.extractMin()
if n:
ordered.append(n)
else:
break

return ordered

``````
``````

In [17]:

arr = [1 , 10, 9, 15, 5]

elem = [str(i) for i in arr]

h = myHeapArray(minheap=True)
h.insertList(arr)

print (h.get_ordered_list())

h = myHeapArray(minheap=False)
h.insertList(arr)

print (h.get_ordered_list())

``````
``````

[1, 5, 9, 10, 15]
[15, 10, 9, 5, 1]

``````
``````

In [39]:

def median_maintenance(stream):
lower_heap  = myHeapArray(minheap=False) # maintain max value
upper_heap  = myHeapArray(minheap=True)

median_list = []

for i in stream:

if not lower_heap.heap:
# Should run only once
lower_heap.insert(i)
median_list.append(i)
print ("median:", i)
continue

minMedian = lower_heap.heap[0]

if not upper_heap.heap:
# Should run only once
if i >=  minMedian:
upper_heap.insert(i)
median_list.append(minMedian)
print ("median:", minMedian)

else:
v = lower_heap.extractMin()
upper_heap.insert(v)
lower_heap.insert(i)
median_list.append(i)
print ("median:", i)
continue

maxMedian = upper_heap.heap[0]

if i >= maxMedian:
upper_heap.insert(i)
else:
lower_heap.insert(i)

# Rebalance heaps if needed
if len(lower_heap.heap) > len(upper_heap.heap) + 1:
v = lower_heap.extractMin()
upper_heap.insert(v)
elif len(upper_heap.heap) > len(lower_heap.heap) + 1:
v = upper_heap.extractMin()
lower_heap.insert(v)

# Get Median
if (len(lower_heap.heap) == len(upper_heap.heap)) or (len(lower_heap.heap) > len(upper_heap.heap)):
median = lower_heap.heap[0]
else:
median = upper_heap.heap[0]

#print ("median", median)
median_list.append(median)

return median_list

def getdataStream(FILE):
fp = open(FILE, "r")

yield int(i)

FILE = "median.txt"

median_list = median_maintenance(getdataStream(FILE))

print (sum(median_list)%10000)

``````
``````

median: 6331
median: 2793
1213

``````
``````

In [ ]:

``````