In [2]:
# load the libraries
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from sklearn.grid_search import GridSearchCV
from sklearn.kernel_ridge import KernelRidge
import urllib2
In [3]:
# read the data. This is just x/y data in this tab separated file
x = []
y = []
data = urllib2.urlopen("https://edwards.sdsu.edu/~redwards/coverage.tsv")
for l in data:
p=l.strip().split("\t")
x.append(float(p[0]))
y.append(float(p[1]))
# create numpy arrays for the kde work
ny = np.array(y)
nx = np.array(x)
print("X range: {} : {}\nY range: {} : {}".format(min(x), max(x), min(y), max(y)))
In [ ]:
grid = GridSearchCV(
KernelRidge(kernel='rbf', gamma=1e-4),
param_grid={"alpha": [0.1, 0.01, 0.001], "gamma": np.logspace(-4, -3, 5)},
cv=5) # 5-fold cross-validation ... cut from 20 to reduce run time
grid.fit(nx[:, None], ny[:, None])
print(grid.best_params_)
# this is what we will plot on the graph
x_pred = np.linspace(min(x), max(x), 10000)[:, None]
y_pred = grid.predict(xaxis_pred)
In [6]:
kr = KernelRidge(kernel='rbf', gamma=1e-4, alpha=0.001)
kr.fit(nx[:, None], ny[:, None])
# this is what we will plot on the graph
x_pred = np.linspace(min(x), max(x), 10000)[:, None]
y_pred = kr.predict(x_pred)
In [10]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax2=ax.twinx()
ax.plot(x, y, color='r')
ax2.plot(x_pred, y_pred, color='blue')
ax2.set_ylabel("predictions")
ax2.set_ylim([0, 5000])
ax.set_xlabel("Position")
ax.set_ylabel("Coverage")
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
fig.set_facecolor('white')
plt.show()
In [ ]: