In [1]:
def pattern_complexity(pattern):
scalar_complexity = 0.0
nodes_in = pattern.shape[1]
for i in xrange(nodes_in - 1):
for j in xrange(i + 1, nodes_in):
scalar_complexity += numpy.inner(pattern[:, i], pattern[:, j])
# normalise by the number of vectors products, i.e.,
# nodes_in * (nodes_in - 1) / 2
norm = float(nodes_in * (nodes_in - 1)) / 2.0
scalar_complexity /= norm
return numpy.reciprocal(scalar_complexity)
In [2]:
def generate_pattern(nodes_in=8, nodes_out=8, activated=4):
assert activated <= nodes_out
rand_int = numpy.random.randint
rand_arr = numpy.random.random_sample
pattern = numpy.zeros((nodes_out, nodes_in))
for j in xrange(nodes_in):
rows = set()
while len(rows) < activated:
rows.add(rand_int(nodes_out))
values = rand_arr(activated)
values /= values.sum()
for (pos, i) in enumerate(rows):
pattern[i, j] = values[pos]
return pattern
In [3]:
import zlib
In [4]:
import bz2
In [5]:
import lzma
In [6]:
import blosc
In [7]:
%timeit generate_pattern()
In [8]:
pattern = generate_pattern()
strng = pattern.tostring()
In [19]:
plt.matshow(pattern, cmap=plt.cm.gray_r, vmin=0.0, vmax=1.0)
plt.colorbar()
plt.show()
In [9]:
%timeit zlib.compress(strng, 9)
In [10]:
%timeit bz2.compress(strng, 9)
In [11]:
%timeit lzma.compress(strng, options={"level": 9})
In [12]:
%timeit blosc.compress(strng, typesize=pattern.dtype.itemsize)
In [13]:
number = int(1E4)
In [14]:
complexity = numpy.zeros(number, dtype=float)
zlib_res = numpy.zeros(number, dtype=int)
bz2_res = numpy.zeros(number, dtype=int)
lzma_res = numpy.zeros(number, dtype=int)
blosc_res = numpy.zeros(number, dtype=int)
In [15]:
for i in xrange(number):
pattern = generate_pattern()
complexity[i] = pattern_complexity(pattern)
strng = pattern.tostring()
zlib_res[i] = len(zlib.compress(strng, 9))
bz2_res[i] = len(bz2.compress(strng, 9))
lzma_res[i] = len(lzma.compress(strng, options={"level": 9}))
blosc_res[i] = len(blosc.compress(strng, typesize=pattern.dtype.itemsize))
In [16]:
cpalette = ["#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"]
In [17]:
plt.scatter(complexity, zlib_res, label="zlib", c=cpalette[0], marker="^")
plt.scatter(complexity, bz2_res, label="bz2", c=cpalette[1], marker="v")
plt.scatter(complexity, lzma_res, label="lzma", c=cpalette[2], marker="p")
plt.scatter(complexity, blosc_res, label="blosc", c=cpalette[3], marker="s")
plt.xlabel("Scalar Complexity")
plt.ylabel("Length of Compressed Byte-String")
plt.axhline(len(strng), c="red", label="uncompressed")
plt.legend()
plt.show()