In this notebook, we
The features are the eigenvectors for each image array. Thus, if we have n images in our training set, this process produces an n * 16 feature matrix.
In [1]:
%pylab inline
pylab.style.use('ggplot')
import numpy as np
import pandas as pd
import cv2
import os
In [2]:
image_dir = os.path.join(os.getcwd(), 'font_images')
if not os.path.isdir(image_dir) or len(os.listdir(image_dir)) == 0:
print('no images found in {}'.format(image_dir))
First, we outline the processing for a single image.
In [3]:
img_mat = cv2.imread(os.path.join(image_dir, 'arial.png'))
# Convert to grayscale
gs = cv2.cvtColor(img_mat, cv2.COLOR_BGR2GRAY)
In [4]:
gs.shape
Out[4]:
In [5]:
pylab.imshow(gs, cmap='gray')
pylab.tick_params(
axis='both', # changes apply to the x-axis and y-axis
which='both', # both major and minor ticks are affected
bottom='off', top='off', left='off', right='off', # don't display ticks
labelbottom='off', labeltop='off', labelleft='off', labelright='off' # don't display ticklabels
)
In [6]:
# Partition the columns into 10 equal parts
split_positions = np.linspace(0, gs.shape[1], num=12).astype(np.int)
split_positions = split_positions[1:-1]
# manual tweak by inspection
split_positions[0] += 10
split_positions
Out[6]:
In [7]:
parts = np.array_split(gs, split_positions, axis=1)
fig, axes = pylab.subplots(1, len(parts))
for part, ax in zip(parts, axes):
ax.imshow(part, cmap='gray')
ax.tick_params(
axis='both', # changes apply to the x-axis and y-axis
which='both', # both major and minor ticks are affected
bottom='off', top='off', left='off', right='off', # don't display ticks
labelbottom='off', labeltop='off', labelleft='off', labelright='off' # don't display ticklabels
)
In [8]:
fig, axes = pylab.subplots(1, len(parts))
binarized = []
for ax, p in zip(axes, parts):
resized = cv2.resize(p, (32, 32))
_, bin_img = cv2.threshold(resized, 127, 255, cv2.THRESH_BINARY)
binarized.append(bin_img)
ax.imshow(bin_img, cmap='gray')
ax.tick_params(
axis='both', # changes apply to the x-axis and y-axis
which='both', # both major and minor ticks are affected
bottom='off', top='off', left='off', right='off', # don't display ticks
labelbottom='off', labeltop='off', labelleft='off', labelright='off' # don't display ticklabels
)
In [9]:
def calc_on_pixel_fraction(part_img):
# Note that on pixel == 0, off pixel == 255
_, counts = np.unique(part_img, return_counts=True)
return counts[0] / counts[1]
on_pixel_fractions = [calc_on_pixel_fraction(p) for p in binarized]
on_pixel_fractions = pd.Series(on_pixel_fractions, index=list('0123456789,'))
on_pixel_fractions.plot(kind='bar', title='On pixel fractions for all chars')
Out[9]:
In [10]:
# Again, note that on pixel == 0, off pixel == 255
def calc_f_on_pixel_pos(part_img, f, axis=0):
assert axis in (0, 1)
on_x, on_y = np.where(part_img==0)
on_dim = on_x if axis == 0 else on_y
return f(on_dim)
m_x = [calc_f_on_pixel_pos(p, np.mean, axis=0) for p in binarized]
m_y = [calc_f_on_pixel_pos(p, np.mean, axis=1) for p in binarized]
mean_on_pixel_xy = pd.DataFrame(np.column_stack([m_x, m_y]),
index=list('0123456789,'),
columns=['mean_x', 'mean_y'])
mean_on_pixel_xy.plot(kind='bar', subplots=True)
Out[10]:
In [11]:
v_x = [calc_f_on_pixel_pos(p, np.var, axis=0) for p in binarized]
v_y = [calc_f_on_pixel_pos(p, np.var, axis=1) for p in binarized]
var_on_pixel_xy = pd.DataFrame(np.column_stack([v_x, v_y]),
index=list('0123456789,'),
columns=['var_x', 'var_y'])
var_on_pixel_xy.plot(kind='bar', subplots=True)
Out[11]:
In [12]:
def calc_on_pixel_x_y_corr(part_img):
coef = np.corrcoef(np.where(part_img == 0))
return coef[1, 0]
x_y_corrs = [calc_on_pixel_x_y_corr(p) for p in binarized]
x_y_corrs = pd.Series(x_y_corrs, index=list('0123456789,'))
x_y_corrs.plot(kind='bar')
Out[12]:
Note: I decided to not use this feature after adding the moment based features.
Moment calculation in OpenCV is described here:
http://docs.opencv.org/2.4/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html
In [13]:
def calc_moments(part_img):
moments = cv2.moments(part_img, binaryImage=True)
return moments
m_list = [calc_moments(p) for p in binarized]
m_df = pd.DataFrame.from_records(m_list)
chars = ('zero', 'one', 'two', 'three', 'four',
'five', 'six', 'seven', 'eight', 'nine', 'comma')
m_df.index = chars
In [14]:
m_df.head()
Out[14]:
In [15]:
figure, axes = pylab.subplots(8, 3, figsize=(20, 24))
moment_cols = m_df.columns.values.reshape(8, 3)
for i, row in enumerate(moment_cols):
for j, col in enumerate(row):
m_df.loc[:, col].plot(kind='bar', title=col, ax=axes[i][j])
pylab.tight_layout()
So, among all the moments, we choose the normalized moments: nu03, nu11 ('en-eu-one-one'), and nu12. All the other features are have similar shapes across the character classes.
In [26]:
from scipy.fftpack import dct
def calc_dct2d_zigzagged_coeffs(part_img, n_diags=3):
dct_result = dct(dct(part_img, norm='ortho').T, norm='ortho')
# To make a feature vector out of the DCT results by taking the elements
# of dct_result in a zigzagged fashion.
# We can access these efficiently
# by taking the mirror image and accessing the diagonals.
mirrored = np.fliplr(dct_result)
idx_first = mirrored.shape[0] - 1
idx_last = idx_first - n_diags
zigzagged_coeffs = np.concatenate([np.diag(mirrored, k)
for k in range(idx_first, idx_last, -1)])
return zigzagged_coeffs
diag_var_dct = [calc_dct2d_zigzagged_coeffs(p, n_diags=3) for p in binarized]
dct_df = pd.DataFrame.from_records(diag_var_dct, index=chars)
dct_df.plot(kind='bar', subplots=True, figsize=(10, 20))
Out[26]:
In [17]:
def partition_image(img_file, n_chars, size=32, threshold=127):
"""
* Read the RGB image `img_file`
* Convert to grayscale
* Split into one subarray per character
* Resize to `size * size`
* Binarize with threshold `threshold`
Return a list of subarrays for each character.
"""
assert os.path.isfile(img_file)
img_mat = cv2.imread(img_file)
gs = cv2.cvtColor(img_mat, cv2.COLOR_BGR2GRAY)
split_positions = np.linspace(0, gs.shape[1], num=n_chars+1).astype(np.int)
split_positions = split_positions[1:-1]
# manual tweak by inspection
split_positions[0] += 10
parts = np.array_split(gs, split_positions, axis=1)
resized_images = []
for p in parts:
p_new = cv2.resize(p, (size, size))
_, bin_img = cv2.threshold(p_new, threshold, 255, cv2.THRESH_BINARY)
resized_images.append(bin_img)
return resized_images
In [18]:
from functools import partial
def calc_on_pixel_fraction(part_img):
_, counts = np.unique(part_img, return_counts=True)
return counts[0] /counts[1]
def calc_f_on_pixel_pos(part_img, f, axis=0):
assert axis in (0, 1)
on_x, on_y = np.where(part_img==0)
on_dim = on_x if axis == 0 else on_y
return f(on_dim)
def calc_on_pixel_x_y_corr(part_img):
coef = np.corrcoef(np.where(part_img == 0))
return coef[0, 1]
def calc_moments(part_img, moments_to_keep={'nu03', 'nu11', 'nu12'}):
moments = cv2.moments(part_img, binaryImage=True)
return {k: v for k, v in moments.items() if k in moments_to_keep}
In [27]:
from scipy.fftpack import dct
def calc_dct2d_zigzagged_coeffs(part_img, n_diags=3):
"""Return a 1D numpy array with the zigzagged 2D DCT coefficients."""
dct_result = dct(dct(part_img, norm='ortho').T, norm='ortho')
mirrored = np.fliplr(dct_result)
idx_first = mirrored.shape[0] - 1
idx_last = idx_first - n_diags
zigzagged_coeffs = np.concatenate([np.diag(mirrored, k)
for k in range(idx_first, idx_last, -1)])
return zigzagged_coeffs
# dictionary of functions
feature_calc = {
'on_pixel_frac': calc_on_pixel_fraction,
# 'on_pixel_x_mean': partial(calc_f_on_pixel_pos, f=np.mean, axis=0),
# 'on_pixel_y_mean': partial(calc_f_on_pixel_pos, f=np.mean, axis=1),
'on_pixel_x_var': partial(calc_f_on_pixel_pos, f=np.var, axis=0),
'on_pixel_y_var': partial(calc_f_on_pixel_pos, f=np.var, axis=1),
# 'on_pixel_x_y_corr': calc_on_pixel_x_y_corr,
}
def extract_features(img_file, chars):
"""
Extract_features for a combined image. Returns a DataFrame with 1 row per character.
"""
char_images = partition_image(img_file, len(chars))
font_name = os.path.basename(img_file).split('.')[0]
features = []
for char_img in char_images:
feature_vals = {fname: fgen(char_img) for fname, fgen in feature_calc.items()}
# Calculate the moment feature values separately and update feature_vals.
moment_features = calc_moments(char_img)
feature_vals.update(moment_features)
features.append(feature_vals)
features = pd.DataFrame.from_records(features, index=chars)
features.index.name = 'char_name'
features['font_name'] = font_name
# Include the DCT features
dct_features = [calc_dct2d_zigzagged_coeffs(p) for p in char_images]
dct_features = pd.DataFrame.from_records(diag_var_dct, index=chars)
dct_features.columns = ['dct_{}'.format(c) for c in dct_features.columns]
# Combine DCT and other features
all_features = pd.concat([features, dct_features], axis=1)
return all_features
In [28]:
from IPython.display import display
from ipywidgets import FloatProgress
font_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir)]
prog = FloatProgress(min=1, max=len(font_files), description='Extracting features...')
display(prog)
all_features = []
chars = ('zero', 'one', 'two', 'three', 'four',
'five', 'six', 'seven', 'eight', 'nine', 'comma')
for font_file in font_files:
feature_df = extract_features(font_file, chars)
all_features.append(feature_df)
prog.value += 1
prog.bar_style = 'success'
all_features = pd.concat(all_features, axis=0)
In [29]:
all_features.info()
In [30]:
num_values = all_features.drop('font_name', axis=1)
np.isfinite(num_values).sum(axis=0)
Out[30]:
In [31]:
# This is only necessary if on_pixel_x_y_corr is included.
if 'on_pixel_x_y_corr' in all_features.keys():
invalid_corr = ~np.isfinite(all_features['on_pixel_x_y_corr'])
all_features.loc[invalid_corr, 'on_pixel_x_y_corr']
comma_mean = all_features.loc['comma', 'on_pixel_x_y_corr'].mean()
four_mean = all_features.loc['four', 'on_pixel_x_y_corr'].mean()
invalid_comma_idx = (all_features.index == 'comma') & invalid_corr
all_features.loc[invalid_comma_idx, 'on_pixel_x_y_corr'] = comma_mean
invalid_four_idx = (all_features.index == 'four') & invalid_corr
all_features.loc[invalid_four_idx, 'on_pixel_x_y_corr'] = four_mean
In [32]:
all_features.to_csv('char_features.csv')