In [17]:
# -*- coding=utf-8 -*-                                                          
import os                                                                       
import tarfile                                                                  
from six.moves import urllib                                                    
                                                                                
"""                                                                             
Purpose:                                                                        
    Fetch data from GitHub                                                      
"""                                                                             
                                                                                
__author__ = "Yue-Wen FANG"                                                     
__copyright__ = "Copyright 2018"                                                
__version__ = "0.0.1"                                                           
__maintainer__ = "Yue-Wen FANG"                                                 
__email__ = "fyuewen@gmail.com"                                                 
__status__ = "development"                                                      
__date__ = "June 1, 2018"                                                       
                                                                                
# DOWNLOAD_ROOT = "https://github.com/ageron/handson-ml/tree/master/"             
# HOUSING_PATH = os.path.join("datasets", "housing")
# HOUSING_URL = DOWNLOAD_ROOT + "datasets/housing/housing.tgz"

DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml/master/"
HOUSING_PATH = os.path.join("datasets", "housing")
HOUSING_URL = DOWNLOAD_ROOT + "datasets/housing/housing.tgz"
                                                                                
                                                                                
def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):
    if not os.path.isdir(housing_path):                                         
        os.makedirs(housing_path)                                               
    tgz_path = os.path.join(housing_path, "housing.tgz")
    print(tgz_path)
    urllib.request.urlretrieve(housing_url, tgz_path)                           
    housing_tgz = tarfile.open(tgz_path)                                        
    housing_tgz.extractall(path=housing_path)                                   
    housing_tgz.close()  

                                                                                
                                                                                
fetch_housing_data()


datasets/housing/housing.tgz

In [ ]: