Copyright 2020 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
In [1]:
import numpy as np
import json
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
import subprocess
import time
import os
In [3]:
with open("packages_npm.txt") as file:
packages = file.read().strip().split('\n')
In [10]:
def get_seconds(time):
min_ind = time.find('m')
mins = int(time[:min_ind])
second = float(time[min_ind + 1:-1])
return mins * 60 + second
def log(file, msg):
f = open(file, 'a+')
f.write(msg + '\n')
f.close()
In [11]:
rates_gzip = []
rates_brotli = []
times_gzip = []
times_brotli = []
speed_gzip = []
speed_brotli = []
init_sizes = []
all_urls = []
for i in range(len(packages)):
with open("package.txt", "w") as file:
file.write(packages[i])
#delete the current node_modules directories containing previous package
result = subprocess.run(["rm", "-rf", "node_modules"])
#install the package and save the names of js scripts
result = subprocess.run(["bash", "npm_install_packages.sh"])
result = subprocess.run(["bash", "find_urls_save.sh"])
with open("urls_for_package.txt") as file:
urls = file.read().strip().split('\n')
all_urls.append(urls)
#concatenate all scripts of that package together to simulate web bundle
script_concatenated = ""
for url in all_urls[i]:
if url == "":
continue
if not os.path.exists(url):
print(i)
print("DOESN'T EXIST: ", url)
continue
with open(url) as file:
script_concatenated += file.read()
rates_gzip_compressed = []
rates_brotli_compressed = []
times_gzip_compressed = []
times_brotli_compressed = []
speed_gzip_compressed = []
speed_brotli_compressed = []
with open("example2.txt", "w") as file:
file.write(script_concatenated)
size_non_compressed = os.stat("example2.txt").st_size
init_sizes.append(size_non_compressed)
# do the gzip compression with different levels
for level in range(4, 10):
result = subprocess.run(["bash", "gzip_compress.sh", str(level), "time2.txt",
"example_gzip2.txt.gz", "example2.txt"])
with open("time2.txt") as file:
user_sys = file.read().strip().split('\n')[1:]
time = get_seconds(user_sys[0].split('\t')[1]) + get_seconds(user_sys[1].split('\t')[1])
size_gzip_compressed = os.stat("example_gzip2.txt.gz").st_size
rates_gzip_compressed.append(size_non_compressed / size_gzip_compressed)
times_gzip_compressed.append(time)
speed_gzip_compressed.append(size_non_compressed / time)
# do the brotli compression with different levels
for level in range(4, 12):
result = subprocess.run(["bash", "brotli_compress.sh", str(level), "time2.txt",
"example_brotli2.txt.br", "example2.txt"])
with open("time2.txt") as file:
user_sys = file.read().strip().split('\n')[1:]
time = get_seconds(user_sys[0].split('\t')[1]) + get_seconds(user_sys[1].split('\t')[1])
size_br_compressed = os.stat("example_brotli2.txt.br").st_size
rates_brotli_compressed.append(size_non_compressed / size_br_compressed)
times_brotli_compressed.append(time)
speed_brotli_compressed.append(size_non_compressed / time)
rates_gzip.append(rates_gzip_compressed)
rates_brotli.append(rates_brotli_compressed)
times_gzip.append(times_gzip_compressed)
times_brotli.append(times_brotli_compressed)
speed_gzip.append(speed_gzip_compressed)
speed_brotli.append(speed_brotli_compressed)
if i != 0 and i % 100 == 0:
log("logs3.txt", "rates_gzip: " + str(np.mean(rates_gzip, axis=0)))
log("logs3.txt", "rates_brotli: " + str(np.mean(rates_brotli, axis=0)))
log("logs3.txt", "times_gzip: " + str(np.mean(times_gzip, axis=0)))
log("logs3.txt", "times_brotli: " + str(np.mean(times_brotli, axis=0)))
log("logs3.txt", "speed_gzip: " + str(np.mean(speed_gzip, axis=0)))
log("logs3.txt", "speed_brotli: " + str(np.mean(speed_brotli, axis=0)))
In [13]:
import pandas as pd
frame = pd.DataFrame()
frame["name"] = ["gzip 4", "gzip 5", "gzip 6", "gzip 7", "gzip 8", "gzip 9",
"brotli 4", "brotli 5", "brotli 6", "brotli 7", "brotli 8", "brotli 9", "brotli 10", "brotli 11"]
frame["rates"] = np.hstack((np.mean(rates_gzip, axis=0), np.mean(rates_brotli, axis=0)))
frame["savings"] = 1 - 1 / np.hstack((np.mean(rates_gzip, axis=0), np.mean(rates_brotli, axis=0)))
frame["speed(MB/s)"] = np.hstack((np.mean(speed_gzip, axis=0), np.mean(speed_brotli, axis=0))) / 1000000
frame
Out[13]:
In [25]:
print("non compressed size range {}MB-{}MB".format(np.min(init_sizes) / 1000000, np.max(init_sizes)/ 1000000))