In this notebook, we

  • load up the saved .png files.
  • read the image into a numpy array.
  • partition the image into individual arrays (recall that each image is '012345689') representing each number.
  • resize each digit image into 16*16

The features are the eigenvectors for each image array. Thus, if we have n images in our training set, this process produces an n * 16 feature matrix.


In [1]:
%pylab inline
pylab.style.use('ggplot')

import numpy as np
import pandas as pd
import cv2
import os


Populating the interactive namespace from numpy and matplotlib

In [2]:
image_dir = os.path.join(os.getcwd(), 'font_images')

if not os.path.isdir(image_dir) or len(os.listdir(image_dir)) == 0:
    print('no images found in {}'.format(image_dir))

First, we outline the processing for a single image.


In [3]:
img_mat = cv2.imread(os.path.join(image_dir, 'arial.png'))

# Convert to grayscale
gs = cv2.cvtColor(img_mat, cv2.COLOR_BGR2GRAY)

In [4]:
gs.shape


Out[4]:
(124, 911)

In [5]:
pylab.imshow(gs, cmap='gray')
pylab.tick_params(
        axis='both',          # changes apply to the x-axis and y-axis
        which='both',      # both major and minor ticks are affected
        bottom='off', top='off', left='off', right='off', # don't display ticks
        labelbottom='off', labeltop='off', labelleft='off', labelright='off' # don't display ticklabels
)



In [6]:
# Partition the columns into 10 equal parts
split_positions = np.linspace(0, gs.shape[1], num=12).astype(np.int)
split_positions = split_positions[1:-1]

# manual tweak by inspection
split_positions[0] += 10 

split_positions


Out[6]:
array([ 92, 165, 248, 331, 414, 496, 579, 662, 745, 828])

In [7]:
parts = np.array_split(gs, split_positions, axis=1)

fig, axes = pylab.subplots(1, len(parts))

for part, ax in zip(parts, axes):
    ax.imshow(part, cmap='gray')
    ax.tick_params(
        axis='both',          # changes apply to the x-axis and y-axis
        which='both',      # both major and minor ticks are affected
        bottom='off', top='off', left='off', right='off', # don't display ticks
        labelbottom='off', labeltop='off', labelleft='off', labelright='off' # don't display ticklabels
)



In [8]:
fig, axes = pylab.subplots(1, len(parts))

binarized = []

for ax, p in zip(axes, parts):
    resized = cv2.resize(p, (32, 32))
    _, bin_img = cv2.threshold(resized, 127, 255, cv2.THRESH_BINARY)
    
    binarized.append(bin_img)
    
    ax.imshow(bin_img, cmap='gray')
    ax.tick_params(
        axis='both',          # changes apply to the x-axis and y-axis
        which='both',      # both major and minor ticks are affected
        bottom='off', top='off', left='off', right='off', # don't display ticks
        labelbottom='off', labeltop='off', labelleft='off', labelright='off' # don't display ticklabels
    )


Now we're ready to build image features. Let's take one of the images and work out the feature extraction process.

Statistical Features

Fraction of On Pixels


In [9]:
def calc_on_pixel_fraction(part_img):
    # Note that on pixel == 0, off pixel == 255
    _, counts = np.unique(part_img, return_counts=True)
    return counts[0] / counts[1]

on_pixel_fractions = [calc_on_pixel_fraction(p) for p in binarized]
on_pixel_fractions = pd.Series(on_pixel_fractions, index=list('0123456789,'))
on_pixel_fractions.plot(kind='bar', title='On pixel fractions for all chars')


Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x1d0616a36a0>

Mean x, y Positions of All On Pixels


In [10]:
# Again, note that on pixel == 0, off pixel == 255
def calc_f_on_pixel_pos(part_img, f, axis=0):
    assert axis in (0, 1)
    on_x, on_y = np.where(part_img==0)
    on_dim = on_x if axis == 0 else on_y    
    return f(on_dim)

m_x = [calc_f_on_pixel_pos(p, np.mean, axis=0) for p in binarized]
m_y = [calc_f_on_pixel_pos(p, np.mean, axis=1) for p in binarized]

mean_on_pixel_xy = pd.DataFrame(np.column_stack([m_x, m_y]), 
                                index=list('0123456789,'), 
                                columns=['mean_x', 'mean_y'])

mean_on_pixel_xy.plot(kind='bar', subplots=True)


Out[10]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x000001D06176F710>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000001D0617C0B00>], dtype=object)

Variance of x-y Positions of All on Pixels


In [11]:
v_x = [calc_f_on_pixel_pos(p, np.var, axis=0) for p in binarized]
v_y = [calc_f_on_pixel_pos(p, np.var, axis=1) for p in binarized]

var_on_pixel_xy = pd.DataFrame(np.column_stack([v_x, v_y]), 
                                index=list('0123456789,'), 
                                columns=['var_x', 'var_y'])

var_on_pixel_xy.plot(kind='bar', subplots=True)


Out[11]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x000001D061830C50>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000001D06192DDD8>], dtype=object)

Correlation of x-y positions of All Pixels


In [12]:
def calc_on_pixel_x_y_corr(part_img):
    coef = np.corrcoef(np.where(part_img == 0))
    return coef[1, 0]

x_y_corrs = [calc_on_pixel_x_y_corr(p) for p in binarized]
x_y_corrs = pd.Series(x_y_corrs, index=list('0123456789,'))

x_y_corrs.plot(kind='bar')


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x1d061a3b3c8>

Note: I decided to not use this feature after adding the moment based features.

Moment Based Features

Moment calculation in OpenCV is described here:

http://docs.opencv.org/2.4/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html


In [13]:
def calc_moments(part_img):
    moments = cv2.moments(part_img, binaryImage=True)
    return moments

m_list = [calc_moments(p) for p in binarized]

m_df = pd.DataFrame.from_records(m_list)

chars = ('zero', 'one', 'two', 'three', 'four', 
         'five', 'six', 'seven', 'eight', 'nine', 'comma')

m_df.index = chars

In [14]:
m_df.head()


Out[14]:
m00 m01 m02 m03 m10 m11 m12 m20 m21 m30 ... mu20 mu21 mu30 nu02 nu03 nu11 nu12 nu20 nu21 nu30
zero 989.0 15335.0 324701.0 7728947.0 15086.0 233962.0 4973130.0 315390.0 4891614.0 7457846.0 ... 85271.298281 -60.956806 45531.387526 0.088868 -0.000043 0.000046 0.000611 0.087179 -0.000002 0.001480
one 997.0 15479.0 327351.0 7778453.0 15254.0 236986.0 5028816.0 319116.0 4958184.0 7545362.0 ... 85731.329990 -1136.800230 39557.707789 0.087555 -0.000199 0.000160 0.000492 0.086248 -0.000036 0.001260
two 976.0 15109.0 320537.0 7649701.0 14783.0 228761.0 4878331.0 308241.0 4770291.0 7287251.0 ... 84330.048156 1213.282613 63860.726667 0.090955 0.000171 -0.000092 0.000875 0.088528 0.000041 0.002146
three 986.0 15283.0 323773.0 7712023.0 15066.0 233506.0 4963800.0 315928.0 4896124.0 7491924.0 ... 85720.742394 -240.482759 44957.815617 0.089371 0.000002 -0.000017 0.000560 0.088172 -0.000008 0.001473
four 986.0 15288.0 324094.0 7723506.0 15150.0 234929.0 4991217.0 319376.0 4952401.0 7599804.0 ... 86594.559838 -381.630424 31485.984703 0.089542 -0.000036 0.000028 0.000348 0.089071 -0.000013 0.001031

5 rows × 24 columns


In [15]:
figure, axes = pylab.subplots(8, 3, figsize=(20, 24))

moment_cols = m_df.columns.values.reshape(8, 3)

for i, row in enumerate(moment_cols):
    for j, col in enumerate(row):    
        m_df.loc[:, col].plot(kind='bar', title=col, ax=axes[i][j])

pylab.tight_layout()


So, among all the moments, we choose the normalized moments: nu03, nu11 ('en-eu-one-one'), and nu12. All the other features are have similar shapes across the character classes.

DCT Based Features


In [26]:
from scipy.fftpack import dct

def calc_dct2d_zigzagged_coeffs(part_img, n_diags=3):
    dct_result = dct(dct(part_img, norm='ortho').T, norm='ortho')
    # To make a feature vector out of the DCT results by taking the elements 
    # of dct_result in a zigzagged fashion.
    # We can access these efficiently
    # by taking the mirror image and accessing the diagonals.
    mirrored = np.fliplr(dct_result)
    
    idx_first = mirrored.shape[0] - 1
    idx_last = idx_first - n_diags
            
    zigzagged_coeffs = np.concatenate([np.diag(mirrored, k) 
        for k in range(idx_first, idx_last, -1)])
    
    return zigzagged_coeffs

diag_var_dct = [calc_dct2d_zigzagged_coeffs(p, n_diags=3) for p in binarized]

dct_df = pd.DataFrame.from_records(diag_var_dct, index=chars)
dct_df.plot(kind='bar', subplots=True, figsize=(10, 20))


Out[26]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x000001D0634DA0B8>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000001D064BECBA8>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000001D064C00B38>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000001D064C824E0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000001D064C97FD0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000001D064D156D8>], dtype=object)

Putting it Together


In [17]:
def partition_image(img_file, n_chars, size=32, threshold=127):
    """
    * Read the RGB image `img_file` 
    * Convert to grayscale
    * Split into one subarray per character
    * Resize to `size * size`
    * Binarize with threshold `threshold`
    
    Return a list of subarrays for each character.
    """
    assert os.path.isfile(img_file)
    
    img_mat = cv2.imread(img_file)    
    gs = cv2.cvtColor(img_mat, cv2.COLOR_BGR2GRAY)
    
    split_positions = np.linspace(0, gs.shape[1], num=n_chars+1).astype(np.int)
    split_positions = split_positions[1:-1]
    
    # manual tweak by inspection
    split_positions[0] += 10 
    
    parts = np.array_split(gs, split_positions, axis=1)
    
    resized_images = []
    
    for p in parts:
        p_new = cv2.resize(p, (size, size))
        _, bin_img = cv2.threshold(p_new, threshold, 255, cv2.THRESH_BINARY)
        resized_images.append(bin_img)
        
    return resized_images

In [18]:
from functools import partial

def calc_on_pixel_fraction(part_img):
    _, counts = np.unique(part_img, return_counts=True)
    return counts[0] /counts[1]

def calc_f_on_pixel_pos(part_img, f, axis=0):
    assert axis in (0, 1)
    on_x, on_y = np.where(part_img==0)
    on_dim = on_x if axis == 0 else on_y    
    return f(on_dim)

def calc_on_pixel_x_y_corr(part_img):
    coef = np.corrcoef(np.where(part_img == 0))
    return coef[0, 1]

def calc_moments(part_img, moments_to_keep={'nu03', 'nu11', 'nu12'}):
    moments = cv2.moments(part_img, binaryImage=True)
    return {k: v for k, v in moments.items() if k in moments_to_keep}

In [27]:
from scipy.fftpack import dct

def calc_dct2d_zigzagged_coeffs(part_img, n_diags=3):
    """Return a 1D numpy array with the zigzagged 2D DCT coefficients."""
    
    dct_result = dct(dct(part_img, norm='ortho').T, norm='ortho')    
    mirrored = np.fliplr(dct_result)
    
    idx_first = mirrored.shape[0] - 1
    idx_last = idx_first - n_diags
            
    zigzagged_coeffs = np.concatenate([np.diag(mirrored, k) 
        for k in range(idx_first, idx_last, -1)])
    
    return zigzagged_coeffs
    
# dictionary of functions 
feature_calc = {
    'on_pixel_frac': calc_on_pixel_fraction,
    # 'on_pixel_x_mean': partial(calc_f_on_pixel_pos, f=np.mean, axis=0),
    # 'on_pixel_y_mean': partial(calc_f_on_pixel_pos, f=np.mean, axis=1),
    'on_pixel_x_var': partial(calc_f_on_pixel_pos, f=np.var, axis=0),
    'on_pixel_y_var': partial(calc_f_on_pixel_pos, f=np.var, axis=1),
    # 'on_pixel_x_y_corr': calc_on_pixel_x_y_corr,
}

def extract_features(img_file, chars):
    """
    Extract_features for a combined image. Returns a DataFrame with 1 row per character.
    """
    char_images = partition_image(img_file, len(chars))
    font_name = os.path.basename(img_file).split('.')[0]
    
    features = []
    
    for char_img in char_images:
        feature_vals = {fname: fgen(char_img) for fname, fgen in feature_calc.items()}
        
        # Calculate the moment feature values separately and  update feature_vals.
        moment_features = calc_moments(char_img)
        feature_vals.update(moment_features)        
        features.append(feature_vals)
        
    features = pd.DataFrame.from_records(features, index=chars)
    features.index.name = 'char_name'
    features['font_name'] = font_name
    
    # Include the DCT features
    dct_features = [calc_dct2d_zigzagged_coeffs(p) for p in char_images]
    dct_features = pd.DataFrame.from_records(diag_var_dct, index=chars)
    dct_features.columns = ['dct_{}'.format(c) for c in dct_features.columns]
    
    # Combine DCT and other features
    all_features = pd.concat([features, dct_features], axis=1)
    
    return all_features

In [28]:
from IPython.display import display
from ipywidgets import FloatProgress

font_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir)]
prog = FloatProgress(min=1, max=len(font_files), description='Extracting features...')
display(prog)

all_features = []

chars = ('zero', 'one', 'two', 'three', 'four', 
         'five', 'six', 'seven', 'eight', 'nine', 'comma')

for font_file in font_files:
    feature_df = extract_features(font_file, chars)
    all_features.append(feature_df)    
    prog.value += 1
    
prog.bar_style = 'success'
    
all_features = pd.concat(all_features, axis=0)



In [29]:
all_features.info()


<class 'pandas.core.frame.DataFrame'>
Index: 1496 entries, zero to comma
Data columns (total 13 columns):
nu03              1496 non-null float64
nu11              1496 non-null float64
nu12              1496 non-null float64
on_pixel_frac     1496 non-null float64
on_pixel_x_var    1496 non-null float64
on_pixel_y_var    1496 non-null float64
font_name         1496 non-null object
dct_0             1496 non-null float64
dct_1             1496 non-null float64
dct_2             1496 non-null float64
dct_3             1496 non-null float64
dct_4             1496 non-null float64
dct_5             1496 non-null float64
dtypes: float64(12), object(1)
memory usage: 163.6+ KB

In [30]:
num_values = all_features.drop('font_name', axis=1)
np.isfinite(num_values).sum(axis=0)


Out[30]:
nu03              1496
nu11              1496
nu12              1496
on_pixel_frac     1496
on_pixel_x_var    1496
on_pixel_y_var    1496
dct_0             1496
dct_1             1496
dct_2             1496
dct_3             1496
dct_4             1496
dct_5             1496
dtype: int64

In [31]:
# This is only necessary if on_pixel_x_y_corr is included.

if 'on_pixel_x_y_corr' in all_features.keys():
    invalid_corr = ~np.isfinite(all_features['on_pixel_x_y_corr'])
    all_features.loc[invalid_corr, 'on_pixel_x_y_corr']

    comma_mean = all_features.loc['comma', 'on_pixel_x_y_corr'].mean()
    four_mean = all_features.loc['four', 'on_pixel_x_y_corr'].mean()

    invalid_comma_idx = (all_features.index == 'comma') & invalid_corr
    all_features.loc[invalid_comma_idx, 'on_pixel_x_y_corr'] = comma_mean

    invalid_four_idx = (all_features.index == 'four') & invalid_corr
    all_features.loc[invalid_four_idx, 'on_pixel_x_y_corr'] = four_mean

In [32]:
all_features.to_csv('char_features.csv')