In [74]:
import sys
sys.path.append('/home/yuncong/Brain/pipeline_scripts')
import matplotlib
%matplotlib inline
#from IPython.display import display
#matplotlib.use('GTkAgg')
import matplotlib.pyplot as plt
import pickle
import os
from os import path
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import Perceptron
from sklearn.cluster import KMeans
from sklearn import svm
from numpy.linalg import lstsq
import numpy as np
import sys
import utilities2014
reload(utilities2014)
from utilities2014 import *
from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
In [75]:
pcklPath = "/home/iizhaki/oasis/WebStem/pickles/"
mapped = pickle.load(open(pcklPath + "mapped.pck", "rb"))
props = pickle.load(open(pcklPath + "props.pck", "rb"))
hists = pickle.load(open(pcklPath + "hists.pck", "rb"))
refs = pickle.load(open(pcklPath + "refs.pck", "rb"))
In [76]:
tripletsPath = "/home/iizhaki/WebStem/triplets/"
imgsPath = "/home/iizhaki/WebStem/photos/"
In [77]:
os.environ['GORDON_DATA_DIR'] = '/home/yuncong/project/DavidData2014tif/'
os.environ['GORDON_REPO_DIR'] = '/home/yuncong/Brain'
os.environ['GORDON_RESULT_DIR'] = '/home/yuncong/project/DavidData2014results/'
os.environ['GORDON_LABELING_DIR'] = '/home/yuncong/project/DavidData2014labelings/'
In [78]:
def collectPageData(pPath, pData):
redRef = ""
blueRef = ""
with open(pPath) as pFile:
for pLine in pFile:
pSplit = pLine.split()
if len(pSplit) < 6: # Maybe comment or blank space
continue
#print pSplit
pName = pSplit[4]
pColor = pSplit[5]
# Initialize sets
if "gold" in pColor:
boundNum = int(pName.split("_")[3])
superpixel = int(pName.split("_")[4])
if "red" in pSplit[6]:
redRef = boundNum
if redRef not in pData:
pData[redRef] = set()
pData[redRef].add(superpixel)
elif "blue" in pSplit[6]:
blueRef = boundNum
if blueRef not in pData:
pData[blueRef] = set()
pData[blueRef].add(superpixel)
else:
print "ERROR!"
return "ERROR!"
continue
boundNum = int(pName.split("_")[2])
superpixel = int(pName.split("_")[3])
if "red" in pColor:
if boundNum == redRef:
pData[redRef].add(superpixel)
if "blue" in pColor:
if boundNum == blueRef:
pData[blueRef].add(superpixel)
return "Ok"
In [79]:
pagesPath = [(tripletsPath + f) for f in os.listdir(tripletsPath)]
In [80]:
pData = {}
for pageFile in pagesPath:
print collectPageData(pageFile, pData)
print pData
In [ ]:
In [81]:
# 30 sections in total
collectHists = {}
for section in pData:
# Initialize array of histograms
collectHists[section] = []
for superpixel in pData[section]:
collectHists[section].append(hists[section][superpixel])
In [ ]:
# [(11, 7), (13, 5), (14, 12)], [(12, 12)]
# [(11, 22), (12, 11)], [(13, 25), (14, 17)]
# Some manual work:
predXs = []
predYs = []
for section in [11, 13, 14]:
for currHist in collectHists[section]:
predXs.append(currHist)
predYs.append(1)
for section in [12]:
for currHist in collectHists[section]:
predXs.append(currHist)
predYs.append(-1)
#predXs = np.array(predXs) * 1.0 / np.max(predXs)
In [ ]:
print predXs[0:2], predYs[0:2]
print predXs[-2:], predYs[-2:]
# print predYs[0][0], predYs[1][0]
In [ ]:
import shutil
allFiles = os.listdir(imgsPath)
print allFiles[0]
In [ ]:
for section in pData:
spxs = pData[section]
break ############
for sp in spxs:
exp = "ref_" + str(section) + "_" + str(sp)
for fn in allFiles:
if exp in fn:
thedir = "/home/iizhaki/WebStem/temp/" + str(section)
try:
os.stat(thedir)
except:
os.mkdir(thedir)
shutil.copy2(imgsPath + "/" + fn, "/home/iizhaki/WebStem/temp/" + str(section))
In [ ]:
In [ ]:
predYs_arr = np.array(predYs)
predXs_arr = np.array(predXs)
predXs_arr.shape
In [ ]:
plt.scatter(predXs_arr[predYs_arr==1,0], predXs_arr[predYs_arr==1,1], c='r')
plt.scatter(predXs_arr[predYs_arr==-1,0], predXs_arr[predYs_arr==-1,1], c='g')
plt.show()
#plt.savefig("/home/iizhaki/WebStem/temp/img_[(11,22),(12,11)]vs.[(13,25),(14,17)].png")
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [112]:
# Training SVM
X = predXs
y = predYs
X = np.array(X).astype(float)
#scaler = MinMaxScaler().fit(X)
#X = scaler.transform(X)
In [113]:
#X = [np.array(x) * 1.0 / np.max(x) for x in predXs]
#thetax, _, _, _ = np.linalg.lstsq(X, y)
#del clf
#clf = SGDRegressor()
#clf.fit(X, y)
#clf = svm.SVC(verbose = True)
#clf.fit(X, y)
param_grid = {'C': [1, 1e1, 1e2, 1e3, 5e3, 1e4, 5e4, 1e5],
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 1, 10], }
#clf = GridSearchCV(SVC(kernel='poly', class_weight='auto', degree = 14), param_grid)
#clf = SVC(verbose = True, kernel='poly', class_weight='auto', degree=4)
clf = SVC(verbose = True, kernel='poly', class_weight='auto', degree=5)
clf = clf.fit(X, y)
#clf = Perceptron()
#clf.fit(X, y)
print predYs
In [114]:
def predict(data, theta):
return np.dot(np.array(data), np.array(theta))
In [115]:
trueYes = 0
totalYes = 0
trueNo = 0
totalNo = 0
predictions = clf.predict(X)
print predictions
for i in range(len(predictions)):
#thePred = predict(predXs[i], thetax)
thePred = predictions[i]
if (predYs[i] > 0):
totalYes += 1
if (thePred > 0.):
trueYes += 1
else:
totalNo += 1
if (thePred < 0):
trueNo += 1
print "===================="
print trueYes * 1.0 / totalYes
print trueNo * 1.0 / totalNo
In [ ]:
In [116]:
#print clf.coef_
#print clf.intercept_
In [117]:
# [(25, 22), (27, 28)] vs. [(26, 3)]
# [(25, 22), (27, 28)] vs. [(26, 5)]
# [(11, 7), (13, 5), (14, 12)] vs. [(12, 12)]
# [(11, 22), (12, 11)], [(13, 25), (14, 17)]
viss = {}
#for cSection in [25, 26, 27]:
for cSection in [13, 14]:
pos26 = []
scores26 = []
DM = DataManager(generate_hierarchy = False, stack = 'RS141', resol = 'x5', section = cSection)
cSupporters = DM.load_pipeline_result('nonoverlappingSupporters', 'pkl')
print pData[cSection]
#viss[cSection] = DM.visualize_cluster(pData[cSection])
tuples = []
for cBoundary in range(len(cSupporters)):
total = 0
truePos = 0
trueNeg = 0
scores = []
cSample = (cSection, cBoundary)
pot = []
for s in cSupporters[cBoundary]:
total += 1
hs = hists[cSection][s]
hs = np.array(hs).astype(float)
#hs = scaler.transform(hs)
#print clf.decision_function(hs)
#if clf.predict(hs)[0] > 0:
#if cBoundary == 0:
#print clf.decision_function(hs)
scores.append(clf.decision_function(hs)[0])
if clf.decision_function(hs) >= 1:
truePos += 1
pot.append(s)
value = truePos * 100.0 / total
#if value < 70:
# continue
tuples.append((value, cBoundary))
scores = np.array(scores)
scores26.append((np.mean(scores), cBoundary))
if cSection == 26:
pos26 += pot
#print cSample, ": Using classifier results:", truePos * 100.0 / total
print "Section ", cSection, ": ", sorted(tuples, reverse=True)
print "============================================"
scores26 = sorted(scores26, reverse=True)
for i in range(len(scores26)):
if scores26[i][0] > 0:
print "Boudnary: ", scores26[i][1], ", Score: ", scores26[i][0]
In [73]:
DM = DataManager(generate_hierarchy = False, stack = 'RS141', resol = 'x5', section = 26)
cSupporters = DM.load_pipeline_result('nonoverlappingSupporters', 'pkl')
display(DM.visualize_cluster(pos26))
Out[73]:
In [25]:
DM = DataManager(generate_hierarchy = False, stack = 'RS141', resol = 'x5', section = 26)
matchings = DM.load_pipeline_result('DBoundaries26With27', 'npy')
In [26]:
print matchings[np.argsort(matchings[:, 28]), 28]
print np.argsort(matchings[:, 28])
In [ ]:
In [27]:
DM = DataManager(generate_hierarchy = False, stack = 'RS141', resol = 'x5', section = 2)
matchings = DM.load_pipeline_result('DBoundaries2With4', 'npy')
print matchings[np.argsort(matchings[:, 8]), 8]
print np.argsort(matchings[:, 8])
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
groups = [(tripletsPath + f) for f in os.listdir(tripletsPath)]
predXs = [0] * 40
predYs = [0] * 40
predResults = [0] * 40
print groups
In [195]:
for group in groups:
# Blue is lower than Red
groupN = ""
groupBR = [set(), set()]
with open(group) as f:
for line in f:
splitted = line.split()
if len(splitted) < 6:
continue
if groupN == "":
groupN = imgsPath + splitted[4].split("/")[2]
print "N: ", groupN
groupO = ([f for f in os.listdir(groupN) if "img_ref_" in f])
print "O: ", groupO
groupY =([fn.split("_")[2] for fn in groupO])
print "Y: ", groupY
lower = int(groupO[0].split("_")[3])
upper = int(groupO[1].split("_")[3])
if groupY[0] == "26":
br = 0
else:
br = 1
print "Br: ", br
if splitted[5] == "blue":
if ("ref_" + groupY[0]) in splitted[4]:
groupBR[br].add(int(splitted[4].split("_")[3]))
elif splitted[5] == "red":
if ("ref_" + groupY[1]) in splitted[4]:
groupBR[1-br].add(int(splitted[4].split("_")[3]))
elif splitted[5] == "uncheck":
if splitted[6] == "blue" and ("ref_" + groupY[0]) in splitted[4]:
groupBR[br].remove(int(splitted[4].split("_")[3]))
elif splitted[6] == "red" and ("ref_" + groupY[1]) in splitted[4]:
groupBR[1-br].remove(int(splitted[4].split("_")[3]))
# =============================
y0 = int(groupY[0])
y1 = int(groupY[1])
# =============================
if predXs[br] == 0:
predXs[br] = []
predYs[br] = []
predXs[br].append(hists[y0][lower]);
predYs[br].append(1.0);
predXs[br].append(hists[y1][upper]);
predYs[br].append(-1.0);
for b1 in groupBR[br]:
predXs[br].append(hists[y0][b1])
predYs[br].append(1.0)
for r1 in groupBR[1-br]:
predXs[br].append(hists[y1][r1])
predYs[br].append(-1.0)
# =============================
if predXs[1-br] == 0:
predXs[1-br] = []
predYs[1-br] = []
predXs[1-br].append(hists[y0][lower]);
predYs[1-br].append(-1.0);
predXs[1-br].append(hists[y1][upper]);
predYs[1-br].append(1.0);
for b1 in groupBR[br]:
predXs[1-br].append(hists[y0][b1])
predYs[1-br].append(-1.0)
for r1 in groupBR[1-br]:
predXs[1-br].append(hists[y1][r1])
predYs[1-br].append(1.0)
In [ ]:
In [ ]:
In [203]:
for i in range(len(predYs)):
X = predXs[i]
y = predYs[i]
if X == 0:
continue
clf = svm.SVC()
clf.fit(X, y)
predResults[i] = clf
In [272]:
for i in range(len(predYs)):
if predResults[i] != 0:
print i, predResults[i].support_
In [273]:
# 0 => Corresponds to 26
# 1 => Corresponds to others
In [274]:
import sys
sys.path.append('/home/yuncong/Brain/pipeline_scripts')
from utilities2014 import *
In [275]:
os.environ['GORDON_DATA_DIR'] = '/home/yuncong/project/DavidData2014tif/'
os.environ['GORDON_REPO_DIR'] = '/home/yuncong/Brain'
os.environ['GORDON_RESULT_DIR'] = '/home/yuncong/project/DavidData2014results/'
os.environ['GORDON_LABELING_DIR'] = '/home/yuncong/project/DavidData2014labelings/'
In [288]:
for smpl1 in [(25, 22), (27, 28)]:
sec1 = smpl1[0]
i = smpl1[1]
total = 0
right = 0
wrong = 0
dm_prev = DataManager(generate_hierarchy=False, stack='RS141', resol='x5', section=sec1)
supporters_prev = dm_prev.load_pipeline_result('nonoverlappingSupporters', 'pkl')
for s in supporters_prev[i]:
total += 1
if predResults[0].predict(hists[sec1][s])[0] < 0:
right += 1
if predResults[1].predict(hists[sec1][s])[0] > 0:
wrong += 1
print sec1, ": Using classifier '26' results", right, "out of", total
print sec1, ": Using classifier 'other' results", wrong, "out of", total
for smpl2 in [(26, 3)]:
sec2 = smpl2[0]
j = smpl2[1]
total = 0
right = 0
wrong = 0
dm_next = DataManager(generate_hierarchy=False, stack='RS141', resol='x5', section=sec2)
supporters_next = dm_next.load_pipeline_result('nonoverlappingSupporters', 'pkl')
for s in supporters_next[j]:
total += 1
if predResults[1].predict(hists[sec2][s])[0] < 0:
right += 1
if predResults[0].predict(hists[sec2][s])[0] > 0:
wrong += 1
print sec2, ": Using classifier 'other' results", right, "out of", total
print sec2, ": Using classifier '26' results", wrong, "out of", total
In [289]:
sec = 25
dm_next = DataManager(generate_hierarchy=False, stack='RS141', resol='x5', section=sec)
supporters_next = dm_next.load_pipeline_result('nonoverlappingSupporters', 'pkl')
for j in range(len(supporters_next)):
total = 0
right = 0
wrong = 0
for s in supporters_next[j]:
total += 1
if predResults[1].predict(hists[sec][s])[0] < 0:
right += 1
if predResults[0].predict(hists[sec][s])[0] > 0:
wrong += 1
print "(", sec, ", ", j, "): Using classifier 'other' results: ", (right * 100.0 / total)
#print "(26, ", j, "): Using classifier '26' results", wrong, "out of", total
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [122]:
print X[0], X[1], y[0], y[1]
In [123]:
prc = Perceptron()
prc.fit(X, y)
clf = svm.SVC()
clf.fit(X, y)
thetax, _, _, _ = np.linalg.lstsq(X, y)
In [142]:
allImgs = [f for f in os.listdir(groupN) if "ref_" in f and "img" not in f]
total = 0
right = 0
def predict(data, theta):
theta = np.array(theta)
prediction = [np.dot(theta, d) for d in data]
return prediction
#ptrain = predict(X_train, thetax)
for b1 in groupBR[0]:
if int(clf.predict(hists[g0][b1])) == 1:
right += 1
total += 1
print right * 100.0 / total
# ==============================
total = 0
right = 0
for r1 in groupBR[1]:
if int(clf.predict(hists[g1][r1])) == 0:
right += 1
total += 1
print right * 100.0 / total
# ==============================
total = 0
right = 0
for img in allImgs:
cords = img.split("_")[1: 3]
if cords[0] == "ref":
continue
a = int(cords[0])
b = int(cords[1])
# print cords[0], prc.predict(hists[a][b])[0]
#if int(cords[0]) == int(2 - prc.predict(hists[a][b])[0]):
res = np.dot(thetax, np.array(hists[a][b]))
#if int(cords[0]) == int(round(2 - res)):
if int(cords[0]) == int(round(2 - clf.predict(hists[a][b])[0])):
right += 1
total += 1
print right * 100.0 / total
In [ ]:
In [ ]:
In [110]:
for group in groups:
# Blue is lower than Red
groupN = ""
groupBR = [set(), set()]
lower = ""
upper = ""
with open(group) as f:
for line in f:
splitted = line.split()
#print splitted
#break
if len(splitted) < 6:
continue
if groupN == "":
groupN = imgsPath + splitted[4].split("/")[2]
groupY = ([f.split("_")[1] for f in os.listdir(groupN) if "org_" in f])
#print sorted([f.split("_")[1] for f in os.listdir(groupN) if "org_" in f])
#print [f.split("_")[1] for f in os.listdir(groupN) if "org_" in f]
#groupY = ["0", "1"]
groupO = ([f for f in os.listdir(groupN) if "img_ref_" in f])
#groupO = [f for f in os.listdir(groupN) if "img_ref_" in f]
print groupY, groupO
if ("img_ref_" + groupY[0]) in groupO[0]:
lower = int(groupO[0].split("_")[3])
upper = int(groupO[1].split("_")[3])
else:
lower = int(groupO[1].split("_")[3])
upper = int(groupO[0].split("_")[3])
print "Group:", groupN, groupY
if splitted[5] == "blue":
if ("ref_" + groupY[0]) in splitted[4]:
groupBR[0].add(int(splitted[4].split("_")[3]))
elif splitted[5] == "red":
if ("ref_" + groupY[1]) in splitted[4]:
groupBR[1].add(int(splitted[4].split("_")[3]))
elif splitted[5] == "uncheck":
if splitted[6] == "blue" and ("ref_" + groupY[0]) in splitted[4]:
groupBR[0].remove(int(splitted[4].split("_")[3]))
elif splitted[6] == "red" and ("ref_" + groupY[1]) in splitted[4]:
groupBR[1].remove(int(splitted[4].split("_")[3]))
print groupBR[0]
print groupBR[1]
print lower, upper
i = 0
X = []
y = []
if "26" in groupO[0]:
g0 = 0
g1 = 1
k0 = int(groupY[0])
k1 = int(groupY[1])
else:
g0 = 1
g1 = 0
k0 = int(groupY[1])
k1 = int(groupY[0])
#g0 = int(groupY[0])
#g1 = int(groupY[1])
lowHist = hists[k0][lower]
highHist = hists[k1][upper]
lenHist = len(lowHist)
# =============================
if predXs[g0] == 0:
predXs[g0] = []
predYs[g0] = []
predXs[g0].append(lowHist);
predYs[g0].append(1.0);
predXs[g0].append(highHist);
predYs[g0].append(-1.0);
for b1 in groupBR[0]:
predXs[g0].append(hists[k0][b1])
predYs[g0].append(1.0)
for r1 in groupBR[1]:
predXs[g0].append(hists[k1][r1])
predYs[g0].append(-1.0)
# =============================
if predXs[g1] == 0:
predXs[g1] = []
predYs[g1] = []
predXs[g1].append(lowHist);
predYs[g1].append(-1.0);
predXs[g1].append(highHist);
predYs[g1].append(1.0);
for b1 in groupBR[0]:
predXs[g1].append(hists[k0][b1])
predYs[g1].append(-1.0)
for r1 in groupBR[1]:
predXs[g1].append(hists[k1][r1])
predYs[g1].append(1.0)
In [ ]:
In [ ]: