In [233]:
import os
import random
import shutil
import string
import tempfile
from versioning_fs import VersioningFS, VersionManager
KB = 1024
MB = pow(1024, 2)
GB = pow(1024, 3)
PATHS = {'BACKUPS': 'backups',
'USER_FILES': 'userfiles',
'TEMP': 'tmp'}
class Paths(dict):
def __init__(self, base_dir):
self.__base_dir = base_dir
self.__paths = PATHS
def __getattr__(self, attr):
path = self.__paths[attr]
abs_path = os.path.join(self.__base_dir, path)
return abs_path
def iterkeys(self):
return self.__paths.iterkeys()
def itervalues(self):
return [self.__getattr__(n) for n in self.__paths.iterkeys()]
def __repr__(self):
return repr({n: self.__getattr__(n) for n in self.iterkeys()})
class DictVersionManager(VersionManager):
def __init__(self):
super(DictVersionManager, self).__init__()
self.__files = {}
def has_snapshot(self, path):
if self.__files.get(path) is not None:
return True
return False
def version(self, path):
version = self.__files.get(path)
return version
def set_version(self, path, version):
self.__files[path] = version
def remove(self, path):
if path in self.__files:
self.__files.pop(path, None)
def get_dir_size(path):
bytes = 0
for root, dirs, filenames in os.walk(path):
for name in filenames:
abs_path = os.path.join(root, name)
bytes += os.path.getsize(abs_path)
return bytes
def random_filename(size=20):
chars = string.ascii_uppercase + string.digits
return ''.join(random.choice(chars) for _ in range(size))
class BaseTestFS(object):
def __init__(self, fs):
temp_dir = tempfile.mkdtemp()
self.paths = Paths(temp_dir)
for folder in self.paths.itervalues():
if not os.path.exists(folder):
os.makedirs(folder)
manager = DictVersionManager()
self.fs = VersioningFS(manager, self.paths.USER_FILES,
self.paths.BACKUPS, self.paths.TEMP)
def __del__(self):
for folder in self.paths.itervalues():
if os.path.exists(folder):
shutil.rmtree(folder)
class TestFS(BaseTestFS):
def __init__(self, *args, **kwargs):
super(TestFS, self).__init__(*args, **kwargs)
@property
def total_usage(self):
all_counters = [self.backups_usage, self.user_usage]
bytes = 0
for counter in all_counters:
bytes += counter
return bytes
@property
def backups_usage(self):
return get_dir_size(self.paths.BACKUPS)
@property
def user_usage(self):
return get_dir_size(self.paths.USER_FILES)
disk_usage = TestFS(fs=VersioningFS)
In [258]:
from random import randint
from StringIO import StringIO
def random_create(original_contents, amount=1):
contents = StringIO(original_contents)
lorem = open("benchmarking/loremipsum.txt", 'rb').read()
for _ in range(amount):
contents.seek(0)
seek_to = randint(0, len(contents.read()))
contents.seek(seek_to)
contents.write(lorem)
contents.seek(0)
return contents.read()
def random_delete(original_contents, amount=1):
contents = StringIO(original_contents)
for _ in range(amount):
start = randint(0, len(contents.read()))
contents.seek(0)
end = randint(start, len(contents.read()))
contents.seek(0)
new_contents = contents.read()[start:end]
contents = StringIO(new_contents)
return contents.read()
def random_append(original_contents, amount=1):
contents = StringIO(original_contents)
lorem = open("benchmarking/loremipsum.txt", 'rb').read()
for _ in range(amount):
seek_to = len(contents.read())
contents.seek(seek_to)
contents.write(lorem)
contents.seek(0)
return contents.read()
def run_modifier(filename, creates, deletes, appends):
f = disk_usage.fs.open(filename, 'rb')
results = f.read()
f.close()
results = ""
for _ in range(creates):
results = random_create(results)
for _ in range(deletes):
results = random_delete(results)
for _ in range(appends):
results = random_append(results)
return results
import time
def run(creates, deletes, appends, files=1, iterations=1):
x = [random_filename() for x in range(files)]
for z in range(iterations):
for name in x:
path = os.path.join(disk_usage.paths.USER_FILES, name)
if not os.path.exists(path):
f = disk_usage.fs.open(name, 'wb')
f.write('')
f.close()
time.sleep(1)
new_contents = run_modifier(name, creates=6, deletes=0, appends=96)
f = disk_usage.fs.open(name, 'wb')
f.write(new_contents)
f.close()
if name == x[0]:
print name, z, os.path.getsize(path)
time.sleep(1)
run(creates=6, deletes=4, appends=96, files=10, iterations=10)
print("Test completed.")
In [236]:
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots(figsize=(12,6))
test_size = 100
x = [z for z in range(test_size)]
y1 = [pow(z, 2.7) for z in range(test_size)]
y2 = [pow(z, 3) for z in range(test_size)]
#plt.ylim(0, 10000)
ax.plot(x, y1, '-b', label='Rsync Filesystem', linewidth=2)
ax.plot(x, y2, '-r', label='Backup Filesystem', linewidth=2)
ax.legend(loc='upper right')
ax.set_title('Filesystem comparison', fontsize=24)
plt.xlabel('Number of Changes', fontsize=12)
plt.ylabel('Space Used (MB)', fontsize= 12)
plt.show()
In [71]:
In [ ]: