notebook.community

Edit and run



In [126]:

    
# import modules
%matplotlib inline

import os
import random
import pylab
import pandas as pd
import numpy as np
import cPickle as pkl
from lasagne import layers, updates
from scipy.misc import imread, imresize
from lasagne.nonlinearities import softmax
from nolearn.lasagne import NeuralNet, BatchIterator
from sklearn.feature_extraction.image import extract_patches

data_root = '/home/faizy/workspace/project/project/datasets/'
model_root = '/home/faizy/workspace/project/project/models/'

icdar_root = 'icdar15/'
test_root = 'Challenge2_Test_Task3_Images'

test_size = 1095



In [127]:

    
# load models
f = open(model_root + 'detector_2.pkl', 'rb')
detector = pkl.load(f)
f.close()

f = open(model_root + 'recognizer.pkl', 'rb')
recognizer = pkl.load(f)
f.close()



In [128]:

    
# visualize dataset
i = random.randrange(1, test_size)
img = imread(os.path.join(data_root, icdar_root, test_root, ('word_' + str(i) + '.png')), flatten = True)
pylab.imshow(img)
pylab.gray()
pylab.show()

image_height = img.shape[0]
image_width = img.shape[1]



In [129]:

    
print 'height: ', image_height, ',width: ', image_width









    



height:  212 ,width:  477



In [130]:

    
patches = extract_patches(img, (image_height, image_height*0.6))
print patches.shape









    



(1, 350, 212, 127)



In [131]:

    
new_lst = []
for i in range(patches.shape[0]):
    for j in range(patches.shape[1]):
        new_lst.append(imresize(patches[i, j, :, :], (32, 32)))
        
print len(new_lst)



In [132]:

    
new_list = np.stack(new_lst)
tester = new_list.reshape(patches.shape[0]*patches.shape[1], 1, 32, 32).astype('float32')
tester.shape









    Out[132]:





(350, 1, 32, 32)



In [133]:

    
tester /= tester.std(axis = None)
tester -= tester.mean()
tester = tester.astype('float32')



In [134]:

    
print tester.shape









    



(350, 1, 32, 32)



In [135]:

    
preder = detector.predict_proba(tester)



In [136]:

    
heatmap = preder[:, 1].reshape((patches.shape[0], patches.shape[1]))
print heatmap.shape



In [137]:

    
pylab.pcolor(heatmap[::-1])
pylab.axis('off')
pylab.show()
pylab.imshow(img)
pylab.show()
a = np.reshape(heatmap, patches.shape[1]*patches.shape[0])
pylab.plot(a)









    












    












    Out[137]:





[<matplotlib.lines.Line2D at 0x7faa4885cf10>]



In [138]:

    
from scipy.ndimage.filters import maximum_filter
peakind = np.nonzero(maximum_filter (a, size=(patches.shape[1]/5)*0.75) == a)[0]
breakind = np.nonzero(maximum_filter((1 - a), size=(patches.shape[1]/5)) == (1 - a))[0]
print peakind, breakind









    



[  0  62 152 238 329] [ 38 121 205 290 349]



In [139]:

    
for i in peakind:
    pylab.imshow(tester[i, 0, :, :])
    pylab.gray()
    pylab.show()



In [140]:

    
word = np.zeros((len(peakind), 1, 32, 32))
for idx, item in enumerate(peakind):
    word[idx, ...] = tester[item, 0, :, :]
    
word = word.astype('float32')



In [141]:

    
predict = recognizer.predict(word)



In [142]:

    
# Define word recognition functions
import re, collections

def words(text): return re.findall('[a-z]+', text.lower()) 

def train(features):
    model = collections.defaultdict(lambda: 1)
    for f in features:
        model[f] += 1
    return model

NWORDS = train(words(file(data_root + 'big.txt').read()))

alphabet = 'abcdefghijklmnopqrstuvwxyz'

def edits1(word):
   splits     = [(word[:i], word[i:]) for i in range(len(word) + 1)]
   deletes    = [a + b[1:] for a, b in splits if b]
   transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b)>1]
   replaces   = [a + c + b[1:] for a, b in splits for c in alphabet if b]
   inserts    = [a + c + b     for a, b in splits for c in alphabet]
   return set(deletes + transposes + replaces + inserts)

def known_edits2(word):
    return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2.lower() in NWORDS)

def known(words): return set(w for w in words if w.lower() in NWORDS)

def correct(word):
    candidates = known([word]) or known(edits1(word)) or known_edits2(word) or [word]
    return sorted(candidates,  key=NWORDS.get, reverse = True)

def classer(arrayer):
    classer_array = []
    for i in range(len(arrayer)):
        if (0 <= arrayer[i] < 10):
            classer_array.append(arrayer[i])
        elif (10 <= arrayer[i] < 36) :
            classer_array.append(alphabet[arrayer[i] - 10].upper())
        elif (36 <= arrayer[i] < 62):
            classer_array.append(alphabet[arrayer[i] - 36])
        else : 
            print 'Is the array correct!?'
    return classer_array



In [143]:

    
real_pred = classer(predict)
real_pred = map(str, real_pred)
letter_stream = ''.join(real_pred)



In [144]:

    
print letter_stream









    



ILndD

String_Correction



In [154]:

    
def str_corr(letter_stream):
    cnt_lwr=0
    cnt_upr=0
    for i in letter_stream:
        if(i.islower()):
            cnt_lwr += 1;
        else:
            cnt_upr +=1;
    if(cnt_lwr > cnt_upr):
        if(letter_stream[0].isupper()):
             letter_stream = letter_stream.title()
        else:
             letter_stream = letter_stream.lower()
    else:
         if(letter_stream[0].isupper()):
             letter_stream = letter_stream.title()
         else:
             letter_stream = letter_stream.upper()
#rint letter_stream



In [155]:

    
str_corr(letter_stream)



In [156]:

    
print letter_stream









    



ILndD



In [157]:

    
print 'Probable words are: ', ', '.join(correct(letter_stream))









    



Probable words are:  LIndy, Ind, LInD, LInd, Indy, ILiaD, LIndi, InneD, InD, LInda, LIneD, kILneD



In [158]:

    
print letter_stream









    



ILndD

FALTU KE KADIYAN starts here



In [67]:

    
#a1 = [ 12,  89, 212, 290, 359, 477]
#a2 = [ 43, 145, 146, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162,
 #163, 164, 165, 166, 248, 249, 250, 329, 402]
a1 = [0,16,33,46]
a2 = [22,54]
a2.append(image_width)
for idx, i in enumerate(a2):
  a3 = []
  for idxj, j in enumerate(a1):
   if j < i and j >= 0:
    a3.append(j)
    a1[idxj] = -1;
    #print a1, a2, a3
   elif not j == -1 and not len(a3) == 0:
     print a3[0]
     break
#if a1[-1] < a3[0]
#print a3[0] 
  #print 'dx', i



In [64]:

    
a3 = [1,2,3]
a3[-1]









    Out[64]:





3



In [43]:

    
j = -1
not j == -1









    Out[43]:





False



In [242]:

    
a1 = [0,16,33,46]
a2 = [22,54]
a2.insert(0, 0)
for i in range(len(a2) - 1):
    print range(a2[i], a2[i+1])









    



[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
[22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]



In [250]:

    
a1 = np.array([0, 16, 33, 46])
a2 = np.array([22, 54])

for i in a2:
    print a1 < i









    



[ True  True False False]
[ True  True  True  True]



In [23]:

    
correct(letter_stream)[0]









    Out[23]:





'9lr12'



In [57]:

    
an = []



In [62]:

    
an.append(correct(letter_stream)[0])



In [63]:

    
an









    Out[63]:





['9lr12', '9lr12', '9lr12']



In [65]:

    
pd.DataFrame({'image': an}).to_csv?









    



Object `to_csv` not found.



In [68]:

    
pd.DataFrame.to_csv?



In [123]:

    
upper_pred = []
for i in real_pred:
    upper_pred.append(i.isupper())



In [124]:

    
upper_pred









    Out[124]:





[True, True, True, False, False, True]



In [118]:

    
to_conv = letter_stream.lower()



In [119]:

    
conver = []
for idx, i in enumerate(to_conv):
    if upper_pred[idx]:
        conver.append(i.upper())
    else:
        conver.append(i)



In [126]:

    
conver









    Out[126]:





['H', 'L', 'T', 'n', 'd', 'D']



In [7]:

    
np.reshape?



In [33]:

    
maximum_filter?



In [ ]: