In [7]:
var SparkContext = require('eclairjs/SparkContext');
var sc = new SparkContext("local[*]", "JavaScript word count");
In [8]:
var file = "../data/dream.txt";
var rdd = sc.textFile(file).cache();
rdd.count();
Out[8]:
In [9]:
var rdd2 = rdd.flatMap(function(sentence) {
return sentence.split(" ");
});
In [10]:
var rdd3 = rdd2.filter(function(word) {
return word.trim().length > 0;
});
In [11]:
var rdd4 = rdd3.mapToPair(function(word) {
var Tuple2 = require('eclairjs/Tuple2');
return new Tuple2(word, 1);
});
In [12]:
var rdd5 = rdd4.reduceByKey(function(a, b) {
return a + b;
});
In [13]:
var rdd6 = rdd5.mapToPair(function(tuple) {
Tuple2 = require('eclairjs/Tuple2');
return new Tuple2(tuple._2()+0.0, tuple._1());
})
In [14]:
var rdd7 = rdd6.sortByKey(false);
JSON.stringify(rdd7.take(10))
Out[14]:
In [ ]: