In [1]:
import numpy as np
import pandas as pd
In [2]:
pixel_start = 13 * 28 + 7
pixel_end = pixel_start + 14
pixel_columns = ['pixel%d' % pixel for pixel in range(pixel_start, pixel_end)]
pixel_columns
Out[2]:
In [3]:
train = pd.read_csv('../data/train.csv', dtype=np.uint8)
train[['label'] + pixel_columns].head()
Out[3]:
In [4]:
test = pd.read_csv('../data/test.csv', dtype=np.uint8)
test[pixel_columns].head()
Out[4]:
In [5]:
train_grouped = train.groupby('label')
train_grouped.size()
Out[5]:
In [6]:
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
% matplotlib inline
NUM_LABELS = 10
NUM_IMAGES_PER_GROUP = 7
fig = plt.figure(figsize=(10, 10))
gs = gridspec.GridSpec(NUM_LABELS, NUM_IMAGES_PER_GROUP)
for label in range(NUM_LABELS):
for i, row in enumerate(train_grouped.get_group(label).head(NUM_IMAGES_PER_GROUP).values):
img_data = row[1:].reshape((28, 28))
ax = plt.subplot(gs[label, i])
ax.imshow(img_data, cmap='gray')
ax.axis('off')