AWS KDE Computation

This file allows easy running of computationally expensive KDE computations on AWS.

Load Data

This section loads data from the internet.


In [22]:
from urllib import urlopen
import sys, os

def chunk_report(bytes_so_far, chunk_size, total_size):
    percent = float(bytes_so_far) / total_size
    percent = round(percent*100, 2)
    sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" % (bytes_so_far, total_size, percent))
    if bytes_so_far >= total_size:
        sys.stdout.write('\n')

def chunk_download(response, save_file_name, chunk_size=8192, report_hook=None):
    total_size = response.info().getheader('Content-Length').strip()
    total_size = int(total_size)
    bytes_so_far = 0
    with open(save_file_name, "wb") as f:
        while 1:
            chunk = response.read(chunk_size)
            f.write(chunk)
            bytes_so_far += len(chunk)
            if not chunk:
                break
            if report_hook:
                 report_hook(bytes_so_far, chunk_size, total_size)
    return bytes_so_far

def load_data(data_file_address, save_file_name, reload=False):
    if reload or not os.path.exists(save_file_name):
        print('{0} was not found. Downloading from {1}...'.format(save_file_name, data_file_address))
        response = urlopen(data_file_address)
        chunk_download(response, save_file_name, report_hook=chunk_report)
        print('Download complete, saved to {0}.'.format(save_file_name))
    else:
        print('{0} was found locally and will be used. To redownload, delete the file and run again or add reload=True to load_data.'.format(save_file_name))

In [23]:
data_file_address = r'https://www.dropbox.com/sh/41z79caglyodesz/AABMg_R8vdyTE2ADc3J9Nfm-a/all_points_item_bins_width2s_trial3.p?dl=1'
save_file_name = r'all_points_item_bins_width2s_trial3.p'

load_data(data_file_address, save_file_name, reload=False)


all_points_item_bins_width2s_trial3.p was found locally and will be used. To redownload, delete the file and run again or add reload=True to load_data.

In [ ]: