In [1]:
import sys
import re
from reprowd.crowdcontext import CrowdContext
from reprowd.presenter.base import BasePresenter
sys.path.append('..')
In [2]:
input_image_path = '../../img/lena.bmp'
n_x = 10
n_y = 10
input_splits_folder = '../../out_img/'
n_assigns = 5
output_stitched_folder = '../../stitched/'
output_blended_image_path = '../../blended.jpg'
output_blended_folder = '../../blended/'
ftp_pub_folder = 'pub_html'
ftp_divs_folder = 'img_divs'
# Project Attributes
project_long_name = 'Crowd Sketch Filter'
project_short_name = 'CSF_project'
project_description = 'Please answer some of these taks!'
presenter_question = 'Replicate the original image as closely as you can.'
In [ ]:
from splitter import split_image
img_split_paths = split_image(input_image_path, n_x, n_y, input_splits_folder)
The uploader script uploads all regions from the local folder into an FTP server
In [ ]:
from uploader import upload_images
img_split_urls = upload_images(img_split_paths, ftp_pub_folder)
In [ ]:
cc = CrowdContext(local_db="sketch.db")
The urls of the uploaded images are sent into the context database
In [ ]:
crowd_data = cc.CrowdData(img_split_urls, 'image_regions')
Our custom made presenter template is set as the current one
In [ ]:
my_presenter = BasePresenter()
my_presenter.set_name(project_long_name)
my_presenter.set_short_name(project_short_name)
my_presenter.set_description(project_description)
my_presenter.set_question(presenter_question)
# Set the name of the project in the template
html_text = None
with open('presenter.html', 'rw') as presenter_file:
html_text = presenter_file.read()
html_text = re.sub("pybossa\.userProgress\(\'([\S]+)\'\)",
"pybossa.userProgress('" + project_short_name + "')",
html_text)
html_text = re.sub("pybossa\.run\(\'([\S]+)\'\)",
"pybossa.run('" + project_short_name + "')",
html_text)
# Set the template as the presenter
my_presenter.set_template(html_text)
crowd_data.set_presenter(my_presenter, lambda obj: {'url_m': obj})
The tasks are published with a predefined number of assignments per task
In [ ]:
r = crowd_data.publish_task(n_assignments=n_assigns)
In [ ]:
crowd_data = crowd_data.get_result()
We traverse crowd_data gathering only the information that will be useful in an organized manner
The images are stored in a base64 string, so during the traversal we are also decode those strings back into jpeg images.
The result is gathered_results array, which is an array of dicts containing the file name and one image per assignment for that filename.
In [ ]:
import base64
from PIL import Image
from io import BytesIO
def gather_images():
results = []
for i in crowd_data.data['id']:
result_obj = {}
result_obj['file_url'] = crowd_data.data['object'][i]
m = re.search('([0-9]+_[0-9]+_[0-9]+_[0-9]+).[a-zA-Z]+', result_obj['file_url'])
result_obj['file'] = m.group(1)
result_obj['img'] = []
for a in crowd_data.data['result'][i]['assignments']:
im = Image.open(BytesIO(base64.b64decode(a['worker_response'][23:])))
result_obj['img'].append(im)
results.append(result_obj)
return results
gathered_results = gather_images()
The stitcher script stitches the images back together and saves them in a local folder
In [ ]:
from stitcher import stitch_images_from_object
stitch_images_from_object(input_image_path, gathered_results, output_stitched_folder)
In [ ]:
from blender import blend_images_from_folder
blend_images_from_folder(output_stitched_folder, output_blended_image_path)
We now need to filter out the users assignments in order to try and enhance the quality of the final piece.
upload_results gets the gathered_results object, saves the images into a temporary folder and uploads them to our FTP server and adds a new field, called urls, with the URLs to the uploaded images
In [ ]:
import os
from uploader import upload_images
import tempfile
import pickle
def upload_results(results):
tmp_folder_path = tempfile.mkdtemp()
for region_obj in results:
path_list = []
for i, img in enumerate(region_obj['img']):
p = os.path.join(tmp_folder_path, region_obj['file'] + '_' + str(i) + '.jpeg')
img.save(p)
path_list.append(p)
region_obj['urls'] = upload_images(path_list, region_obj['file'] + '_assigns')
return results
gathered_results = upload_results(gathered_results)
# save results
pickle.dump( gathered_results, open( "gathered_results.p", "wb" ) )
The first thing we need in order to remove the worst drawings is to sort them from best to worst.
We have modified the quicksort method published on the reprowd Github repo to use a presenter in which the woreker will shown three images: The original image and two user made drawings. The worker will then be asked to choose the best one. We use the workers answers to sort the drawings.
In [ ]:
import pickle
import pprint
# if something goes wonky, we can always load the gathered_results object
gathered_results = pickle.load( open( "gathered_results.p", "rb" ) )
# print out so that we can check the object out
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(gathered_results)
In [ ]:
# Map function that the quick sort presenter will use
def sort_map_func(obj):
return {'url_o':obj[0], 'url_l':obj[1], 'url_r':obj[2]}
filter_presenter = BasePresenter()
filter_presenter.set_name(project_long_name)
filter_presenter.set_short_name(project_short_name)
filter_presenter.set_description(project_description)
filter_presenter.set_question(presenter_question)
# Set the name of the project in the template
html_text = None
with open('sort_presenter.html', 'rw') as presenter_file:
html_text = presenter_file.read()
html_text = re.sub("pybossa\.userProgress\(\'([\S]+)\'\)",
"pybossa.userProgress('" + project_short_name + "')",
html_text)
html_text = re.sub("pybossa\.run\(\'([\S]+)\'\)",
"pybossa.run('" + project_short_name + "')",
html_text)
# Set the template as the presenter
filter_presenter.set_template(html_text)
from quicksort import quicksort
for i, region_obj in enumerate(gathered_results):
# create a copy of the url list
url_list = list(region_obj['urls'])
# we assume that if the default list is 0 to n, then it has NOT been sorted and we sort it, else skip
sorted_yet = False
for index, url_item in enumerate(url_list):
if url_item[-6] != str(index):
sorted_yet = True
break
if sorted_yet:
continue
# sort the list of images
quicksort(url_list,
region_obj['file'],
filter_presenter,
sort_map_func,
region_obj['file_url'],
1,
cc)
# rearrange the url and PIL image list inside region_obj
sorted_PIL_list = []
for u in url_list:
i = region_obj['urls'].index(u)
pil_img = region_obj['img'][i]
sorted_PIL_list.append(pil_img)
region_obj['urls'] = url_list
region_obj['img'] = sorted_PIL_list
Remove the tail of the img arrays.
The size of the tail depends on how aggressive the filter is.
In [ ]:
from stitcher import stitch_images_from_sorted_object
stitch_images_from_sorted_object(input_image_path, gathered_results, n_assigns, output_stitched_folder)
In [11]:
import os
from PIL import Image
from blender import blend_images_from_list
# yeah, kind of manual, but done so that we can just loop over a folder instead of having to create different folders
file_list = os.listdir(output_stitched_folder)
path_list = map(lambda fn: os.path.join(output_stitched_folder, fn), file_list)
img_list = map(lambda fp: Image.open(fp, 'r'), path_list)
# blend 2, 3, 4, and all 5 images
for i in range(2,(n_assigns + 1)):
working_img_list = img_list[:i]
output_path = os.path.join(output_blended_folder, '_'.join(map(str,range(len(working_img_list))))+'.jpeg')
blend_images_from_list(working_img_list, output_path)