In [1]:
from PIL import Image
import numpy as np
In [2]:
def average_hash(fname, size = 16):
img = Image.open(fname)
img = img.convert('L') # 1을 지정하면 이진화, RGB, RGBA, CMYK 등의 모드도 지원
img = img.resize((size, size), Image.ANTIALIAS)
pixel_data = img.getdata()
pixels = np.array(pixel_data)
pixels = pixels.reshape((size, size))
avg = pixels.mean()
diff = 1 * (pixels > avg)
return diff
In [8]:
def np2hash(ahash):
bhash = []
for nl in ahash.tolist():
s1 = [str(i)for i in nl]
s2 = ''.join(s1)
i = int(s2, 2)
bhash.append('%04x' % i)
return ''.join(bhash)
In [9]:
ahash = average_hash('eiffel_tower.jpeg')
In [10]:
print(ahash)
In [11]:
print(np2hash(ahash))
In [12]:
import os, re
In [18]:
search_dir = "./image/101_ObjectCategories/"
cache_dir = "./image/cache_avhash"
if not os.path.exists(cache_dir):
os.mkdir(cache_dir)
def average_hash(fname, size = 16):
fname2 = fname[len(search_dir):]
# image cache
cache_file = cache_dir + "/" + fname2.replace('/','_') + '.csv'
if not os.path.exists(cache_file):
img = Image.open(fname)
img = img.convert('L').resize((size, size), Image.ANTIALIAS)
pixels = np.array(img.getdata()).reshape((size,size))
avg = pixels.mean()
px = 1 * (pixels > avg)
np.savetxt(cache_file, px, fmt="%.0f", delimiter=",")
else:
px = np.loadtxt(cache_file, delimiter=",")
return px
def hamming_dist(a, b):
aa = a.reshape(1, -1)
ab = b.reshape(1, -1)
dist = (aa != ab).sum()
return dist
def enum_all_files(path):
for root, dirs, files in os.walk(path):
for f in files:
fname = os.path.join(root, f)
if re.search(r'\.(jpg|jpeg|pnp)$', fname):
yield fname
def find_image(fname, rate):
src = average_hash(fname)
for fname in enum_all_files(search_dir):
dst = average_hash(fname)
diff_r = hamming_dist(src, dst) / 256
if diff_r < rate:
yield (diff_r, fname)
srcfile = search_dir + "/chair/image_0016.jpg"
html = ""
sim = list(find_image(srcfile, 0.25))
sim = sorted(sim, key = lambda x: x[0])
for r, f in sim:
print(r, ">", f)
s = '<div style="float:left;"><h3>[ 차이 : ' + str(r) + '-' + os.path.basename(f) + ']</h3>' + \
'<p><a herf="' + f + '"><img src="' + f + '" width=400>' + '</a></p></div>'
html += s
html = """<html><head><meta charset="utf8">/head>
<body><h3> 원래 이미지 </h3><p>
<img src = '{0}' width=400></p>{1}</body></html>""".format(srcfile, html)
with open("./avgash-search-output.html", "w", encoding="utf-8") as f:
f.write(html)
print("ok")
In [ ]:
from PIL import Image
import os, glob
import numpy as np
from sklearn.model_selection import train_test_split
# 분류 대상 카테고리 선택
caltech_dir