In [1]:
import pickle
import cPickle
import json
import yaml
import beanstalkc
import umsgpack
from uuid import uuid4
from yaml import (CLoader, CDumper)
beanstalkc by default uses PyYAML to turn strings into Python objects. beanstalkd allows to transmit only strings. So we have a couple of options to serialize Python objects to strings and back again. Consider that both pickle (through cPickle) and yaml (via libyaml) have C versions and that JSON is limited to basic types.
First start a local beanstalkd queue:
beanstalkd -l localhost
or adjust the arguments to match in the following.
In [2]:
queue = beanstalkc.Connection(host="127.0.0.1")
In [3]:
lib_q = beanstalkc.Connection(host="127.0.0.1", parse_yaml=CLoader)
Pure input/output using PyYAML with and without libyaml does not affect speed since the transmitted strings are not touched.
In [4]:
content = """
- This
- is
- a
- yaml
- list
"""
In [5]:
%%timeit
queue.put(content)
job = queue.reserve()
job.delete()
In [6]:
%%timeit
lib_q.put(content)
job = lib_q.reserve()
job.delete()
Getting information from beanstalkd, however, is sped up quite a bit.
In [7]:
%timeit queue.stats()
In [8]:
%timeit lib_q.stats()
A larger job that needs to be transformed to a string.
In [9]:
unique_id = str(uuid4())
unique_id
Out[9]:
In [10]:
job_desc = {
"name": "fitzroy",
"id": unique_id,
"discrete": 2435466673,
"real": 34.23552,
"tuple": (43, 34),
"dict": {"yes": True, "no": False, "I don't know": None},
"list": [4, 2, None, (True, False), [234.2534, "yay"]]
}
In [11]:
p_dump = pickle.dumps(job_desc, protocol=pickle.HIGHEST_PROTOCOL)
queue.put(p_dump)
job = queue.reserve()
p_load = pickle.loads(job.body)
job.delete()
p_load
Out[11]:
In [12]:
len(p_dump)
Out[12]:
In [13]:
%timeit pickle.dumps(job_desc, protocol=pickle.HIGHEST_PROTOCOL)
In [14]:
%timeit pickle.loads(p_dump)
In [15]:
%%timeit
queue.put(pickle.dumps(job_desc, protocol=pickle.HIGHEST_PROTOCOL))
job = queue.reserve()
p_load = pickle.loads(job.body)
job.delete()
In [16]:
cp_dump = cPickle.dumps(job_desc, protocol=pickle.HIGHEST_PROTOCOL)
queue.put(cp_dump)
job = queue.reserve()
cp_load = cPickle.loads(job.body)
job.delete()
cp_load
Out[16]:
In [17]:
len(cp_dump)
Out[17]:
In [18]:
%timeit cPickle.dumps(job_desc, protocol=pickle.HIGHEST_PROTOCOL)
In [19]:
%timeit cPickle.loads(cp_dump)
In [20]:
%%timeit
queue.put(cPickle.dumps(job_desc, protocol=pickle.HIGHEST_PROTOCOL))
job = queue.reserve()
cp_load = cPickle.loads(job.body)
job.delete()
In [21]:
j_dump = json.dumps(job_desc)
queue.put(j_dump)
job = queue.reserve()
j_load = json.loads(job.body)
job.delete()
j_load
Out[21]:
In [22]:
len(j_dump)
Out[22]:
In [23]:
%timeit json.dumps(job_desc)
In [24]:
%timeit json.loads(j_dump)
In [25]:
%%timeit
queue.put(json.dumps(job_desc))
job = queue.reserve()
j_load = json.loads(job.body)
job.delete()
In [26]:
u_dump = umsgpack.dumps(job_desc)
queue.put(u_dump)
job = queue.reserve()
u_load = umsgpack.loads(job.body)
job.delete()
u_load
Out[26]:
In [27]:
len(u_dump)
Out[27]:
In [28]:
%timeit umsgpack.dumps(job_desc)
In [29]:
%timeit umsgpack.loads(u_dump)
In [30]:
%%timeit
queue.put(umsgpack.dumps(job_desc))
job = queue.reserve()
u_load = umsgpack.loads(job.body)
job.delete()
In [31]:
y_dump = yaml.dump(job_desc)
queue.put(y_dump)
job = queue.reserve()
y_load = yaml.load(job.body)
job.delete()
y_load
Out[31]:
In [32]:
len(y_dump)
Out[32]:
In [33]:
%timeit yaml.dump(job_desc)
In [34]:
%timeit yaml.load(y_dump)
In [35]:
%%timeit
queue.put(yaml.dump(job_desc))
job = queue.reserve()
y_load = yaml.load(job.body)
job.delete()
In [36]:
cy_dump = yaml.dump(job_desc, Dumper=CDumper)
queue.put(cy_dump)
job = queue.reserve()
cy_load = yaml.load(job.body, Loader=CLoader)
job.delete()
cy_load
Out[36]:
In [37]:
len(cy_dump)
Out[37]:
In [38]:
%timeit yaml.dump(job_desc, Dumper=CDumper)
In [39]:
%timeit yaml.load(job.body, Loader=CLoader)
In [40]:
%%timeit
queue.put(yaml.dump(job_desc, Dumper=CDumper))
job = queue.reserve()
cy_load = yaml.load(job.body, Loader=CLoader)
job.delete()
Speed-wise, cPickle seems the clear winner with JSON surprisingly as a close second. cPickle also allows serialization of any kind of Python object unlike JSON. Size-wise umsgpack wins but due to its pure Python implementation (as of today) it cannot compete in speed.