Data Engineering 101


In [2]:
import sys
import os
import timeit

from tqdm import tqdm
import boto3

import pandas as pd
import io

In [3]:
def hook(t):
    def inner(bytes_amount):
        t.update(bytes_amount)
    return inner

import math

def convert_size_str(size_bytes):
    if size_bytes == 0:
        return "0B"
    size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
    i = int(math.floor(math.log(size_bytes, 1024)))
    p = math.pow(1024, i)
    s = round(size_bytes / p, 2)
    return "%s %s" % (s, size_name[i])

def convert_size_int(size_bytes):
    if size_bytes == 0:
        return "0"
    i = int(math.floor(math.log(size_bytes, 1024)))
    p = math.pow(1024, i)
    s = round(size_bytes / p, 2)
    return s

In [4]:
# download files

In [ ]:


In [ ]: