In [233]:
import os
import random
import shutil
import string
import tempfile

from versioning_fs import VersioningFS, VersionManager

KB = 1024
MB = pow(1024, 2)
GB = pow(1024, 3)


PATHS = {'BACKUPS': 'backups',
         'USER_FILES': 'userfiles',
         'TEMP': 'tmp'}

class Paths(dict):
    def __init__(self, base_dir):
        self.__base_dir = base_dir
        self.__paths = PATHS

    def __getattr__(self, attr):
        path = self.__paths[attr]
        abs_path = os.path.join(self.__base_dir, path)
        return abs_path

    def iterkeys(self):
        return self.__paths.iterkeys()

    def itervalues(self):
        return [self.__getattr__(n) for n in self.__paths.iterkeys()]

    def __repr__(self):
        return repr({n: self.__getattr__(n) for n in self.iterkeys()})


class DictVersionManager(VersionManager):
    def __init__(self):
        super(DictVersionManager, self).__init__()
        self.__files = {}

    def has_snapshot(self, path):
        if self.__files.get(path) is not None:
            return True
        return False

    def version(self, path):
        version = self.__files.get(path)
        return version

    def set_version(self, path, version):
        self.__files[path] = version

    def remove(self, path):
        if path in self.__files:
            self.__files.pop(path, None)
            
def get_dir_size(path):
    bytes = 0
    for root, dirs, filenames in os.walk(path):
        for name in filenames:
            abs_path = os.path.join(root, name)
            bytes += os.path.getsize(abs_path)
    return bytes


def random_filename(size=20):
    chars = string.ascii_uppercase + string.digits
    return ''.join(random.choice(chars) for _ in range(size))


class BaseTestFS(object):
    def __init__(self, fs):
        temp_dir = tempfile.mkdtemp()
        self.paths = Paths(temp_dir)
        for folder in self.paths.itervalues():
            if not os.path.exists(folder):
                os.makedirs(folder)

        manager = DictVersionManager()        
        self.fs = VersioningFS(manager, self.paths.USER_FILES,
                               self.paths.BACKUPS, self.paths.TEMP)

    def __del__(self):
        for folder in self.paths.itervalues():
            if os.path.exists(folder):
                shutil.rmtree(folder)
        

class TestFS(BaseTestFS):
    def __init__(self, *args, **kwargs):
        super(TestFS, self).__init__(*args, **kwargs)

    @property
    def total_usage(self):
        all_counters = [self.backups_usage, self.user_usage]
        bytes = 0
        for counter in all_counters:
            bytes += counter
            
        return bytes
    
    @property
    def backups_usage(self):
        return get_dir_size(self.paths.BACKUPS)
    
    @property
    def user_usage(self):
        return get_dir_size(self.paths.USER_FILES)
        
        
disk_usage = TestFS(fs=VersioningFS)

In [258]:
from random import randint
from StringIO import StringIO

def random_create(original_contents, amount=1):
    contents = StringIO(original_contents)
    lorem = open("benchmarking/loremipsum.txt", 'rb').read()
    
    for _ in range(amount):
        contents.seek(0)
        seek_to = randint(0, len(contents.read()))
        contents.seek(seek_to)
        contents.write(lorem)
        contents.seek(0)
    
    return contents.read()


def random_delete(original_contents, amount=1):
    contents = StringIO(original_contents)
    
    for _ in range(amount):
        start = randint(0, len(contents.read()))
        contents.seek(0)
        end = randint(start, len(contents.read()))
        contents.seek(0)
        new_contents = contents.read()[start:end]
        contents = StringIO(new_contents)
    
    return contents.read()


def random_append(original_contents, amount=1):
    contents = StringIO(original_contents)
    lorem = open("benchmarking/loremipsum.txt", 'rb').read()
    
    for _ in range(amount):
        seek_to = len(contents.read())
        contents.seek(seek_to)
        contents.write(lorem)
        contents.seek(0)
    
    return contents.read()


def run_modifier(filename, creates, deletes, appends):
    
    f = disk_usage.fs.open(filename, 'rb')
    results = f.read()
    f.close()

    results = ""
    for _ in range(creates):
        results = random_create(results)
    for _ in range(deletes):
        results = random_delete(results)
    for _ in range(appends):
        results = random_append(results)

    return results


import time

def run(creates, deletes, appends, files=1, iterations=1):
    x = [random_filename() for x in range(files)]
    for z in range(iterations):
        for name in x:
            path = os.path.join(disk_usage.paths.USER_FILES, name)
            if not os.path.exists(path):
                f = disk_usage.fs.open(name, 'wb')
                f.write('')
                f.close()
                time.sleep(1)
    
            new_contents = run_modifier(name, creates=6, deletes=0, appends=96)
            f = disk_usage.fs.open(name, 'wb')
            f.write(new_contents)
            f.close()
            if name == x[0]:
                print name, z, os.path.getsize(path)
        time.sleep(1)
            

run(creates=6, deletes=4, appends=96, files=10, iterations=10)
print("Test completed.")


9R3MBCO076G7IFXBLQM4 0 359597
9R3MBCO076G7IFXBLQM4 1 357666
9R3MBCO076G7IFXBLQM4 2 354799
9R3MBCO076G7IFXBLQM4 3 352962
9R3MBCO076G7IFXBLQM4 4 356031
9R3MBCO076G7IFXBLQM4 5 358194
9R3MBCO076G7IFXBLQM4 6 354219
9R3MBCO076G7IFXBLQM4 7 352275
9R3MBCO076G7IFXBLQM4 8 358550
9R3MBCO076G7IFXBLQM4 9 357503
Test completed.

In [236]:
import matplotlib.pyplot as plt
import numpy as np

fig, ax = plt.subplots(figsize=(12,6))

test_size = 100

x = [z for z in range(test_size)]
y1 = [pow(z, 2.7) for z in range(test_size)]
y2 = [pow(z, 3) for z in range(test_size)]

#plt.ylim(0, 10000)

ax.plot(x, y1, '-b', label='Rsync Filesystem', linewidth=2)
ax.plot(x, y2, '-r', label='Backup Filesystem', linewidth=2)

ax.legend(loc='upper right')
ax.set_title('Filesystem comparison', fontsize=24)

plt.xlabel('Number of Changes', fontsize=12)
plt.ylabel('Space Used (MB)', fontsize= 12)
plt.show()



In [71]:


In [ ]: