In [2]:
var Immutable = require('immutable')
var _ = require('lodash')
var commutable = require('commutable')
JSON.parse takes an extra argument called a reviver:
JSON.parse(text[, reviver])
The reviver accepts two parameters, key and value and returns the intended value. The key will either be a text key on Objects or numbers for when the value is in an Array.
Let's walk through some sample code to check this out.
In [3]:
// Classic JSON.parse
JSON.parse('{"a": 2, "b": { "name": "dave" }}')
Out[3]:
In [4]:
function reviver(key, value) {
if(key === 'name') {
return value + " senior";
}
return value
}
JSON.parse('{"a": 2, "b": { "name": "dave" }}', reviver)
Out[4]:
This means you can use this to change values based on a key, though you won't know the nested path of the overall JSON object.
Since the string is (expected to be) JSON, there are only two types which are not immutable: Array and Object. You can use this to your advantage to create frozen or Immutable.js objects while parsing.
In [5]:
JSON.parse('{"a": 2, "b": { "name": "dave" }}', (k, v) => Object.freeze(v))
Out[5]:
In [6]:
function immutableReviver(key, value) {
if (Array.isArray(value)) {
return Immutable.List(value);
}
if (typeof value === 'object') {
return Immutable.Map(value)
}
return value;
}
Since it seemed handy enough, I put immutable-reviver on npm. We'll just use the version written here for now though.
In [7]:
revived = JSON.parse('{"a": 2, "b": { "name": "dave" }}', immutableReviver)
Out[7]:
In [8]:
revived.getIn(['b', 'name'])
Out[8]:
The reason I started looking into this was because I was trying to see if I could optimize loading of notebooks in nteract. We currently rely on a strategy that goes like:
notebook = JSON.parse(rawNotebook)
immutableNotebook = Immutable.fromJS(notebook)
ourNotebook = immutableNotebook.map(...).map(...)... // A series of transformations to create our in-memory representation
These transformations are mostly to turn notebook cells from this:
{
"metadata": {
"collapsed": false,
"outputExpanded": false
},
"cell_type": "markdown",
"source": [
"# Outputs you can update by name\n",
"\n",
"This notebook demonstrates the new name-based display functionality in the notebook. Previously, notebooks could only attach output to the cell that was currently being executed:\n",
"\n"
]
}
into:
{
"metadata": {
"collapsed": false,
"outputExpanded": false
},
"cell_type": "markdown",
"source": "# Outputs you can update by name\n\nThis notebook demonstrates the new name-based display functionality in the notebook. Previously, notebooks could only attach output to the cell that was currently being executed:\n\n"
}
This multi-line string format, introduced by Jupyter, is to accomodate diffing of notebooks in tools like git and GitHub. It's applied to source on cells as well as some output types.
We can set up a reviver that handles all the keys that are most likely to have multi-line strings. We'll start with those that are media types that we know end up being encoded as an array of strings.
In [9]:
var multilineStringMimetypes = new Set([
'application/javascript',
'text/html',
'text/markdown',
'text/latex',
'image/svg+xml',
'image/gif',
'image/png',
'image/jpeg',
'application/pdf',
'text/plain',
]);
function immutableNBReviver(key, value) {
if (Array.isArray(value)) {
if(multilineStringMimetypes.has(key)) {
return value.join('')
}
return Immutable.List(value);
}
if (typeof value === 'object') {
return Immutable.Map(value)
}
return value;
}
We can also set up a "greedy" reviver that will also convert source and text fields. The primary problem with this though, because of how JSON.parse works is that we have no idea if it's a key in a cell where we expect, part of someone else's JSON payload, or in metadata.
In [10]:
var specialKeys = new Set([
'application/javascript',
'text/html',
'text/markdown',
'text/latex',
'image/svg+xml',
'image/gif',
'image/png',
'image/jpeg',
'application/pdf',
'text/plain',
'source',
'text',
]);
function immutableGreedyReviver(key, value) {
if (Array.isArray(value)) {
if(specialKeys.has(key)) {
return value.join('')
}
return Immutable.List(value);
}
if (typeof value === 'object') {
return Immutable.Map(value)
}
return value;
}
In [11]:
// Some logger that uses process.hrtime that I ripped off Stack Overflow, since we want to use timing in a way that we can't with console.time
[ a, o, ms, s, log ] = ( function * () {
yield * [
( process.hrtime )(),
process.hrtime,
ms => ( ( ms[ 0 ] * 1e9 + ms[ 1 ] ) / 1000000 ),
s => s / 1000,
() => {
const f = o( a ), msf = ms( f ), sf = s( msf );
return { a, o: f, ms: msf, s: sf };
}
];
} )();
Out[11]:
In [12]:
// Calculate the milliseconds it takes to run f
function measure(f) {
start = log()
f()
end = log()
return end.ms - start.ms
}
// measure the function run n times, return the mean
function runTrials(f, n=1000) {
values = []
for(var ii=0; ii < n; ii++) {
values.push(measure(f))
}
return values.reduce((a, b) => a + b, 0)/n
}
With our harness all set up, we can run through all the notebooks we have locally to see how they perform with different revivers.
In [13]:
notebooks = require('glob').sync('./*.ipynb')
Out[13]:
In [14]:
for(var notebookPath of notebooks) {
console.log("\n ----- ", path.basename(notebookPath))
raw = fs.readFileSync(notebookPath)
var tests = [
{ name: 'straight JSON.parse', f: () => { JSON.parse(raw) } },
{ name: 'Object.freeze', f: () => { JSON.parse(raw, (k, v) => Object.freeze(v)) } },
{ name: 'basic Immutable', f: () => { JSON.parse(raw, immutableReviver) } },
{ name: 'immutable notebook', f: () => { JSON.parse(raw, immutableNBReviver) } },
{ name: 'immutable greedy nb', f: () => { JSON.parse(raw, immutableGreedyReviver) } },
// { name: 'fromJS', f: () => { JSON.parse(raw, (k, v) => Immutable.fromJS(v)) } },
// { name: 'current commutable way', f: () => { commutable.fromJS(JSON.parse(raw)) } },
]
for(var test of tests) {
mean = runTrials(test.f, 100)
console.log(_.padEnd(test.name, 30), mean)
}
}
Within nteract we are inevitably going to end up creating an immutable structure. These measurements only make sense in the context of running both the initial JSON.parse followed by the transformations. To give it a rough guess, I'll only compare a few I can evaluate.
In [15]:
for(var notebookPath of notebooks) {
console.log("\n ----- ", path.basename(notebookPath))
raw = fs.readFileSync(notebookPath)
var tests = [
{ name: 'straight JSON.parse baseline', f: () => { JSON.parse(raw) } },
{ name: 'Object.freeze baseline', f: () => { JSON.parse(raw, (k,v) => Object.freeze(v)) } },
{ name: 'straight JSON.parse then commutable conversion', f: () => { commutable.fromJS(JSON.parse(raw)) } },
{ name: 'immutable greedy nb', f: () => { JSON.parse(raw, immutableGreedyReviver) } },
]
for(var test of tests) {
mean = runTrials(test.f, 100)
console.log(_.padEnd(test.name, 50), mean.toString().slice(0,10), 'ms')
}
}
Since these are in milliseconds and the difference is not much, it seems like maybe this doesn't need to be optimized. In the case of the altair notebook, which has a pretty big JSON structure inside (and only one!), perhaps it would make sense if some of our structure is frozen objects (don't force vega payloads to be Immutable Maps).
----- altair.ipynb
straight JSON.parse baseline 1.10996391 ms
Object.freeze baseline 2.29745900 ms
straight JSON.parse then commutable conversion 6.84918417 ms
immutable greedy nb 5.85418076 ms
In [ ]:
In [ ]: