This requires numpy, scipy, pillow, cv2, os, requests
In [ ]:
import numpy as np
from PIL import Image as PILImage
import PIL.ImageStat
from IPython.display import Image as IPyImage
import os
from pathlib import Path
import requests
from io import BytesIO
from scipy import signal
from scipy import misc
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
import cv2
import sqlite3
In [2]:
SOURCE_IMAGE_DIRECTORY = r'E:\local\TestData\training-images\hjl'
THUMBNAIL_DIRECTORY = r'E:\local\TestData\training-images\thumbnail'
THUMBNAIL_SIZE = 128, 128
STANDARD_IMAGE_DIM = 256
DATASTORE_DIRECTORY = r'E:\local\TestData\training-images'
DATASTORE_NAME = 'datastore.db'
In [3]:
raw_input_queue = []
for dirname, dirnames, filenames in os.walk(SOURCE_IMAGE_DIRECTORY):
#print(dirname, dirnames, filenames)
# print path to all subdirectories first.
# for subdirname in dirnames:
# print(os.path.join(dirname, subdirname))
for filename in filenames:
full_raw_input_path = os.path.join(dirname, filename)
print('queueing', full_raw_input_path)
raw_input_queue.append(full_raw_input_path)
print('queued', len(raw_input_queue), 'items')
In [4]:
def make_item_record(item):
item_properties = dict()
item_properties['size'] = os.path.getsize(item)
item_properties['dirname'] = os.path.dirname(item)
item_properties['filename'] = os.path.basename(item)
item_properties['ctime'] = os.path.getctime(item)
item_properties['fullpath'] = item
return item_properties
In [20]:
def generate_standardized_image(item):
image_properties = dict()
im = PILImage.open(item)
image_properties['width'] = im.width
image_properties['height'] = im.height
image_properties['format'] = im.mode
im_thumb = im.copy()
im.thumbnail(THUMBNAIL_SIZE)
im_thumb_filename = Path(item).stem
im_thumb_filename = im_thumb_filename + '_thumb' + '.jpg'
im_thumb_path = os.path.join(THUMBNAIL_DIRECTORY, im_thumb_filename)
im_thumb.save(im_thumb_path, 'JPEG')
im_stat = PIL.ImageStat.Stat(im_thumb)
print(im_stat.mean)
meanval = int((im_stat.mean[0] + im_stat.mean[1] + im_stat.mean[2])/3)
image_properties['mean'] = meanval
im_thumb.close()
im_standard = im.copy()
im.close()
image_properties['thumbpath'] = im_thumb_path
image_properties['thumbname'] = im_thumb_filename
return image_properties
In [23]:
# create a new empty datastore for image metadata
#
# in "datastore.db"
#
datastore_path = os.path.join(DATASTORE_DIRECTORY, DATASTORE_NAME)
print('datastore path is', datastore_path)
p = Path(datastore_path)
if p.is_file():
# looks like there is already a datastore file, check with user first
# userConfirm = input(DATASTORE_NAME + " exists, do you really want to start over? (y/n): ")
# if userConfirm == 'Y':
# print("deleting old datastore and reinitializing...")
# else:
# print("nothing done")
# sys.exit(0)
print('removing existing datastore!')
# OK, delete datastore and start over
os.remove(datastore_path)
# create a new empty datastore
print("creating new datastore " + DATASTORE_NAME)
conn = sqlite3.connect(datastore_path)
cursor = conn.cursor()
# create tables (this will completely reset db)
cursor.execute("""CREATE TABLE input_images
(
width int,
height int,
mean int,
filename text,
thumbname text)
""")
conn.commit()
In [25]:
def db_add_record(conn, width, height, mean, filename, thumbname):
cursor = conn.cursor()
datum = [(width, height, mean, filename, thumbname)]
cursor.executemany("INSERT INTO input_images VALUES (?, ?, ?, ?, ?)", datum)
conn.commit()
In [26]:
for item in raw_input_queue:
print('processing', item)
item_info = dict()
item_info['source_file'] = make_item_record(item)
item_info['source_image'] = generate_standardized_image(item)
db_add_record(conn, item_info['source_image']['width'], item_info['source_image']['height'],
item_info['source_image']['mean'],
item_info['source_file']['filename'], item_info['source_image']['thumbname'])
# print(make_item_record(item))
# print(generate_standardized_image(item))
print(item_info)
In [ ]:
conn = sqlite3.connect(datastore_path)
In [28]:
conn.close()
In [ ]:
db_add_record(conn, 128,128, 'test.jpg', 'test_thumb.jpg')
In [29]:
conn = sqlite3.connect(datastore_path)
cursor = conn.cursor()
cursor.execute('select * from input_images')
rows = cursor.fetchall()
for r in rows:
print (r)
conn.close()
In [30]:
cvim = cv2.imread(r'E:\local\TestData\training-images\hjl\6988939547_d734af8034_o.jpg')
In [31]:
cvim.shape
Out[31]:
In [32]:
plt.imshow(cvim)
plt.show
Out[32]:
In [ ]: