In [7]:
var SparkContext = require('eclairjs/SparkContext');
var sc = new SparkContext("local[*]", "JavaScript word count");

In [8]:
var file = "../data/dream.txt";

var rdd = sc.textFile(file).cache();
rdd.count();


Out[8]:
119

In [9]:
var rdd2 = rdd.flatMap(function(sentence) {
    return sentence.split(" ");
});

In [10]:
var rdd3 = rdd2.filter(function(word) {
    return word.trim().length > 0;
});

In [11]:
var rdd4 = rdd3.mapToPair(function(word) {
    var Tuple2 = require('eclairjs/Tuple2');
    return new Tuple2(word, 1);
});

In [12]:
var rdd5 = rdd4.reduceByKey(function(a, b) {
    return a + b;
});

In [13]:
var rdd6 = rdd5.mapToPair(function(tuple) {
    Tuple2 = require('eclairjs/Tuple2');
    return new Tuple2(tuple._2()+0.0, tuple._1());
})

In [14]:
var rdd7 = rdd6.sortByKey(false);
JSON.stringify(rdd7.take(10))


Out[14]:
[{"0":34,"1":"of","length":2},{"0":30,"1":"the","length":2},{"0":19,"1":"be","length":2},{"0":19,"1":"to","length":2},{"0":19,"1":"and","length":2},{"0":15,"1":"will","length":2},{"0":12,"1":"from","length":2},{"0":12,"1":"I","length":2},{"0":11,"1":"freedom","length":2},{"0":10,"1":"that","length":2}]

In [ ]: