In [95]:
%matplotlib inline

import glob
import numpy as np
import matplotlib.pyplot as plt
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, pluck, count
import pandas
from skimage.measure import label, regionprops
from skimage.filters import threshold_otsu

fcompose = lambda *args: compose(*args[::-1])

In [206]:
reshape = lambda arr: arr if len(arr.shape) == 2 else arr[...,0]
to_array = lambda image: reshape(np.asarray(image.convert("L")))

def plt_arrays(arrs):
    fig = plt.figure(figsize=(7, 7))
    N = int(np.ceil(np.sqrt(len(arrs))))
    for i, arr in enumerate(arrs):
        ax = fig.add_subplot(N, N, i + 1)
        out = ax.imshow(arr, cmap='Greys_r', interpolation='none')
        out.axes.get_xaxis().set_visible(False)
        out.axes.get_yaxis().set_visible(False)
    plt.tight_layout()
    plt.show()

out = pipe(
    'data/*.tif',
    glob.glob,
    sorted,
    map(PIL.Image.open),
    map(to_array),
    list,
    plt_arrays
)



In [207]:
@curry
def crop_image(image, cutoff=960):
    return dict(
               upper=image.crop(box=(0, 0, image.size[0], cutoff)),
               lower=image.crop(box=(0, cutoff, image.size[0], image.size[1]))
           )

def plt_array(arr):
    ax = plt.imshow(arr, cmap='Greys_r', interpolation='none')
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    plt.tight_layout()
    plt.show()

out = pipe(
    'data/*.tif',
    glob.glob,
    sorted,
    map(PIL.Image.open),
    map(crop_image),
    pluck('lower'),
    map(to_array),
    map(
        do(
            plt_array
        )
    ),
    list
)



In [208]:
repair_string = lambda string: float('10' if string == 'mum' else string.replace('pm', ''))

scale_pixels = fcompose(
    to_array,
    lambda data: label(data, background=0),
    regionprops,
    get(1),
    lambda data: data.bbox[3] - data.bbox[1]
)

extract_strings = fcompose(
    lambda image: pytesseract.image_to_string(image),
    lambda string: string.split(),
    get([1, 3, -1]),
    lambda data: dict(scale_microns=repair_string(data[0]),
                      date=data[1].replace('-', ''),
                      time=data[2])
)

extract_metadata = fcompose(
    PIL.Image.open,
    crop_image,
    get('lower'),
    lambda image: dict(scale_pixels=scale_pixels(image), **extract_strings(image))
)

out = pipe(
    'data/*.tif',
    glob.glob,
    sorted,
    map(
        lambda filename: dict(filename=filename, **extract_metadata(filename))
    ),
    list,
    pandas.DataFrame
)

print(out)


        date                            filename  scale_microns  scale_pixels  \
0  6/17/2015   data/1045_Steel_Nital-etch-01.tif           10.0           107   
1  6/17/2015   data/1045_Steel_Nital-etch-02.tif           10.0           161   
2  6/17/2015   data/1045_Steel_Nital-etch-03.tif           10.0           267   
3  6/17/2015   data/1045_Steel_Nital-etch-04.tif            1.0            80   
4  6/17/2015   data/1045_Steel_Nital-etch-05.tif            1.0            54   
5  6/17/2015   data/1045_Steel_Nital-etch-06.tif           10.0           161   
6  6/17/2015   data/1045_Steel_Nital-etch-07.tif           10.0           267   
7  9/11/2015   data/20150911_1045_Nital_etch.tif           10.0            54   
8  9/11/2015  data/20150911_1045_Nital_etch2.tif           10.0            54   

       time  
0  13:16:51  
1  13:20:17  
2  13:42:26  
3  13:52:11  
4  14:04:46  
5  14:06:17  
6  14:12:37  
7  10:08:55  
8  11:10:14  

In [209]:
-



In [1]:
from typing import NewType

In [2]:
UserId = NewType('UserId', int)

In [3]:
some_id = UserId(444444)

In [4]:
some_id


Out[4]:
444444

In [5]:
def get_user_name(user_id: UserId) -> str:
    return 'daniel'

In [6]:
get_user_name(-1)


Out[6]:
'daniel'

In [7]:
get_user_name('blah')


Out[7]:
'daniel'

In [ ]: