In [26]:
import urllib.request
import os
import shutil
import tarfile

This is a simple script to download the Caltech256 image feature dataset.


In [27]:
DATASET_URL = r"http://homes.esat.kuleuven.be/~tuytelaa/"\
"unsup/unsup_caltech256_dense_sift_1000_bow.tar.gz"
DESTINATION_DIR = "../../../projects/weiyen/data"

In [28]:
filename = os.path.split(DATASET_URL)[1]
dest_path = os.path.join(DESTINATION_DIR, filename)

if os.path.exists(dest_path):
    print("{} exists. Skipping download...".format(dest_path))
else:
    with urllib.request.urlopen(DATASET_URL) as response, open(dest_path, 'wb') as out_file:
        shutil.copyfileobj(response, out_file)
        print("Dataset downloaded. Extracting files...")

tar = tarfile.open(dest_path)
tar.extractall(path=DESTINATION_DIR)
print("Files extracted.")
tar.close()


Dataset downloaded. Extracting files...
Files extracted.

In [ ]: