Download Data for Pyndent/Classydepth

The code below does download the whole data set, but it's a slow and error prone way to do it. It's easier to just download each of this zip files below and uncompress them into /captures

0000-1000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegZFFwNU5KWkcwbDQ

1000-2000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegQU9hal9jUEhDeUk

2000-3000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegaVhxNTNoWlN2eDQ

3000-4000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegeUFpenQ5V2M3cEU

4000-5000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegNnoxZTJnUUs5bTQ

5000-6000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegMTV4MjRMVXJrcHc

6000-7000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegeXJrb3J4SHJycnc

7000-8000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegdl8zNklXeU9oTTQ

8000-9000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegdlNHZHdCTjV2aUE

9000-9400.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegLXVNUUkzQm5GS0k

noattitude.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegSlN3dWFfUDQta2c

objects.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegcVlrSm8xRUJYSjA



In [ ]:

    
%matplotlib inline
from __future__ import print_function

import ipywidgets
import os
import re
import sys
import urllib
import zipfile

from IPython.display import display

import outputer
import improc



In [ ]:

    
drive_files = []
for root, dirs, files in os.walk('internal'):
    for name in files:
        if name.lower().endswith(".csv"):
            drive_files.append(os.path.join(root, name))
drive_files



In [ ]:

    
def load_file_map(path, mapping=None):
    if mapping is None:
        mapping = []
    with open(path, "r") as folder_data:
        lines = folder_data.readlines()
        for line in lines:
            parts = line.split(",")
            if len(parts) == 3:
                mapping.append(parts)
    return mapping



In [ ]:

    
all_files = []
for drive_data_path in drive_files:
    load_file_map(drive_data_path, all_files)
print(len(all_files))



In [ ]:

    
data_path = outputer.setup_directory("captures")

def download(file_info):
    base_url = "https://drive.google.com/uc?export=download&id="
    url = base_url + file_info[1]
    path = os.path.join(data_path, file_info[0])
    size = int(file_info[2])
    try:
        stats = os.stat(path)
        if stats.st_size == size:
            return
    except (IOError, OSError):
        pass
    filename, headers = urllib.urlretrieve(url, path)
    stats = os.stat(filename)
    if stats.st_size != size:
        print("File", file_info[0], "does not have expected size", file_info[2])



In [ ]:

    
def download_files(files_info):
    progress_bar = ipywidgets.FloatProgress(min=0, max=len(files_info),
                                            description="Downloading:")
    display(progress_bar)
    for i, entry in enumerate(files_info):
        download(entry)
        progress_bar.value = i
    progress_bar.value = progress_bar.max
    print("Download Complete!")

Download the Whole Dataset

NOTE: This will download ~6 gb!



In [ ]:

    
download_files(all_files)

Compute Stats



In [ ]:

    
training, test = improc.enumerate_images("captures")
print("Training:", len(training), "Test:", len(test))
print(training[:2])
print(test[:2])



In [ ]:

    
training_mean = improc.compute_mean_depth(training)
# Expected result: 1680.2417905486018 (Erroneously calculated as 1688.97 previously)
print(training_mean)



In [ ]:

    
training_standard_deviation = improc.compute_std_dev(training, training_mean)
# Expected result: 884.750172634
print(training_standard_deviation)
print(training_standard_deviation / improc.MAX_DEPTH)



In [ ]:

    
test_mean = improc.compute_mean_depth(test)
# Expected result: 1676.3290505903665
print(test_mean)



In [ ]:

    
test_standard_deviation = improc.compute_std_dev(test, test_mean)
# Expected result: 875.721862131
print(test_standard_deviation)
print(test_standard_deviation / improc.MAX_DEPTH)



In [ ]: