In [8]:
# Copyright 2014-2017 Bert Carremans
# Author: Bert Carremans <bertcarremans.be>
#
# License: BSD 3 clause
from flickrapi import FlickrAPI # https://pypi.python.org/pypi/flickrapi
import urllib
import os
import config
from random import randint
import time
def download_flickr_photos(keywords, size='original', max_nb_img=-1):
"""
Downloads images based on keyword search on the Flickr website
Parameters
----------
keywords : string, list of strings
Keyword to search for or a list of keywords should be given.
size : one of the following strings 'thumbnail', 'square', 'medium', default: 'original'.
Size of the image to download. In this function we only provide
four options. More options are explained at
http://librdf.org/flickcurl/api/flickcurl-searching-search-extras.html
max_nb_img : int, default: -1
Maximum number of images per keyword to download. If given a value of -1, all images
will be downloaded
Returns
------
Images found based on the keyword are saved in a separate subfolder.
Notes
-----
This function uses the Python package flickrapi and its walk method.
FlickrAPI.walk has same parameters as FlickrAPI.search
http://www.flickr.com/services/api/flickr.photos.search.html
To use the Flickr API a set of API keys needs to be created on
https://www.flickr.com/services/api/misc.api_keys.html
"""
if not (isinstance(keywords, str) or isinstance(keywords, list)):
raise AttributeError('keywords must be a string or a list of strings')
if not (size in ['thumbnail', 'square', 'medium', 'original']):
raise AttributeError('size must be "thumbnail", "square", "medium" or "original"')
if not (max_nb_img == -1 or (max_nb_img > 0 and isinstance(max_nb_img, int))):
raise AttributeError('max_nb_img must be an integer greater than zero or equal to -1')
flickr = FlickrAPI(config.API_KEY, config.API_SECRET)
if isinstance(keywords, str):
keywords_list = []
keywords_list.append(keywords)
else:
keywords_list = keywords
if size == 'thumbnail':
size_url = 'url_t'
elif size == 'square':
size_url = 'url_q'
elif size == 'medium':
size_url = 'url_c'
elif size == 'original':
size_url = 'url_o'
for keyword in keywords_list:
count = 0
#print('Downloading images for', keyword)
results_folder = config.IMG_FOLDER + keyword.replace(" ", "_") + "/"
if not os.path.exists(results_folder):
os.makedirs(results_folder)
photos = flickr.walk(
text=keyword,
extras=size_url,
license='1,2,4,5',
per_page=50)
urls = []
for photo in photos:
t = randint(1, 3)
time.sleep(t)
count += 1
if max_nb_img != -1:
if count > max_nb_img:
print('Reached maximum number of images to download')
break
try:
url=photo.get(size_url)
urls.append(url)
urllib.request.urlretrieve(url, results_folder + str(count) +".jpg")
print('Downloading image #' + str(count) + ' from url ' + url)
except Exception as e:
print(e, 'Download failure')
print("Total images downloaded:", str(count - 1))