In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline

In [3]:
def selective_median_filter(data, kernel=31, threshold=2):
    """Return copy of data with outliers set to median of specified
        window. Outliers are values that fall out of the 'threshold'
        standard deviations of the window median"""
    if kernel % 2 == 0:
        raise Exception("Kernel needs to be odd.")
    n = len(data)
    res = list(data)
    for i in range(0, n):
        seg = res[max(0,i-(kernel/2)):min(n, i+(kernel/2)+1)]
        mn = np.median(seg)
        if abs(res[i] - mn) > threshold * np.std(seg):
            res[i] = mn
    return res

In [6]:
# Generate random data
data = np.random.poisson(5, 100)

# Set a couple of values as 'outliers'
data[45] = 1000
data[89] = 670

# Plot the data
p = plt.scatter(range(1,101),data)
show(p)



In [7]:
data = selective_median_filter(data, 7, 2)

In [8]:
s = plt.scatter(range(1,101), data)
show(s)



In [9]:
print(data[45], data[89])


(5.0, 7.0)