The code below does download the whole data set, but it's a slow and error prone way to do it. It's easier to just download each of this zip files below and uncompress them into /captures
0000-1000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegZFFwNU5KWkcwbDQ
1000-2000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegQU9hal9jUEhDeUk
2000-3000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegaVhxNTNoWlN2eDQ
3000-4000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegeUFpenQ5V2M3cEU
4000-5000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegNnoxZTJnUUs5bTQ
5000-6000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegMTV4MjRMVXJrcHc
6000-7000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegeXJrb3J4SHJycnc
7000-8000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegdl8zNklXeU9oTTQ
8000-9000.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegdlNHZHdCTjV2aUE
9000-9400.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegLXVNUUkzQm5GS0k
noattitude.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegSlN3dWFfUDQta2c
objects.zip: https://drive.google.com/uc?export=download&id=0B8WcbXogHvegcVlrSm8xRUJYSjA
In [ ]:
%matplotlib inline
from __future__ import print_function
import ipywidgets
import os
import re
import sys
import urllib
import zipfile
from IPython.display import display
import outputer
import improc
In [ ]:
drive_files = []
for root, dirs, files in os.walk('internal'):
for name in files:
if name.lower().endswith(".csv"):
drive_files.append(os.path.join(root, name))
drive_files
In [ ]:
def load_file_map(path, mapping=None):
if mapping is None:
mapping = []
with open(path, "r") as folder_data:
lines = folder_data.readlines()
for line in lines:
parts = line.split(",")
if len(parts) == 3:
mapping.append(parts)
return mapping
In [ ]:
all_files = []
for drive_data_path in drive_files:
load_file_map(drive_data_path, all_files)
print(len(all_files))
In [ ]:
data_path = outputer.setup_directory("captures")
def download(file_info):
base_url = "https://drive.google.com/uc?export=download&id="
url = base_url + file_info[1]
path = os.path.join(data_path, file_info[0])
size = int(file_info[2])
try:
stats = os.stat(path)
if stats.st_size == size:
return
except (IOError, OSError):
pass
filename, headers = urllib.urlretrieve(url, path)
stats = os.stat(filename)
if stats.st_size != size:
print("File", file_info[0], "does not have expected size", file_info[2])
In [ ]:
def download_files(files_info):
progress_bar = ipywidgets.FloatProgress(min=0, max=len(files_info),
description="Downloading:")
display(progress_bar)
for i, entry in enumerate(files_info):
download(entry)
progress_bar.value = i
progress_bar.value = progress_bar.max
print("Download Complete!")
In [ ]:
download_files(all_files)
In [ ]:
training, test = improc.enumerate_images("captures")
print("Training:", len(training), "Test:", len(test))
print(training[:2])
print(test[:2])
In [ ]:
training_mean = improc.compute_mean_depth(training)
# Expected result: 1680.2417905486018 (Erroneously calculated as 1688.97 previously)
print(training_mean)
In [ ]:
training_standard_deviation = improc.compute_std_dev(training, training_mean)
# Expected result: 884.750172634
print(training_standard_deviation)
print(training_standard_deviation / improc.MAX_DEPTH)
In [ ]:
test_mean = improc.compute_mean_depth(test)
# Expected result: 1676.3290505903665
print(test_mean)
In [ ]:
test_standard_deviation = improc.compute_std_dev(test, test_mean)
# Expected result: 875.721862131
print(test_standard_deviation)
print(test_standard_deviation / improc.MAX_DEPTH)
In [ ]: