In [95]:
%matplotlib inline
import glob
import numpy as np
import matplotlib.pyplot as plt
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, pluck, count
import pandas
from skimage.measure import label, regionprops
from skimage.filters import threshold_otsu
fcompose = lambda *args: compose(*args[::-1])
In [206]:
reshape = lambda arr: arr if len(arr.shape) == 2 else arr[...,0]
to_array = lambda image: reshape(np.asarray(image.convert("L")))
def plt_arrays(arrs):
fig = plt.figure(figsize=(7, 7))
N = int(np.ceil(np.sqrt(len(arrs))))
for i, arr in enumerate(arrs):
ax = fig.add_subplot(N, N, i + 1)
out = ax.imshow(arr, cmap='Greys_r', interpolation='none')
out.axes.get_xaxis().set_visible(False)
out.axes.get_yaxis().set_visible(False)
plt.tight_layout()
plt.show()
out = pipe(
'data/*.tif',
glob.glob,
sorted,
map(PIL.Image.open),
map(to_array),
list,
plt_arrays
)
In [207]:
@curry
def crop_image(image, cutoff=960):
return dict(
upper=image.crop(box=(0, 0, image.size[0], cutoff)),
lower=image.crop(box=(0, cutoff, image.size[0], image.size[1]))
)
def plt_array(arr):
ax = plt.imshow(arr, cmap='Greys_r', interpolation='none')
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
plt.tight_layout()
plt.show()
out = pipe(
'data/*.tif',
glob.glob,
sorted,
map(PIL.Image.open),
map(crop_image),
pluck('lower'),
map(to_array),
map(
do(
plt_array
)
),
list
)
In [208]:
repair_string = lambda string: float('10' if string == 'mum' else string.replace('pm', ''))
scale_pixels = fcompose(
to_array,
lambda data: label(data, background=0),
regionprops,
get(1),
lambda data: data.bbox[3] - data.bbox[1]
)
extract_strings = fcompose(
lambda image: pytesseract.image_to_string(image),
lambda string: string.split(),
get([1, 3, -1]),
lambda data: dict(scale_microns=repair_string(data[0]),
date=data[1].replace('-', ''),
time=data[2])
)
extract_metadata = fcompose(
PIL.Image.open,
crop_image,
get('lower'),
lambda image: dict(scale_pixels=scale_pixels(image), **extract_strings(image))
)
out = pipe(
'data/*.tif',
glob.glob,
sorted,
map(
lambda filename: dict(filename=filename, **extract_metadata(filename))
),
list,
pandas.DataFrame
)
print(out)
In [209]:
-
In [1]:
from typing import NewType
In [2]:
UserId = NewType('UserId', int)
In [3]:
some_id = UserId(444444)
In [4]:
some_id
Out[4]:
In [5]:
def get_user_name(user_id: UserId) -> str:
return 'daniel'
In [6]:
get_user_name(-1)
Out[6]:
In [7]:
get_user_name('blah')
Out[7]:
In [ ]: