notebook.community

Edit and run



In [5]:

    
var SparkContext = require('eclairjs/SparkContext');
var sc = new SparkContext("local[*]", "JavaScriptSparkSQL");
var SQLContext = require('eclairjs/sql/SQLContext');
var sqlContext = new SQLContext(sc);



In [6]:

    
// Load a text file and convert each line to a JavaScript Object.
var people = sc.textFile("../data/people.txt").map(function(line) {
	var parts = line.split(",");
	return person = {
    				name: parts[0], 
    				age: parseInt(parts[1].trim())
    		};
});



In [8]:

    
var DataTypes = require('eclairjs/sql/types/DataTypes');
//Generate the schema
var fields = [];
fields.push(DataTypes.createStructField("name", DataTypes.StringType, true));
fields.push(DataTypes.createStructField("age", DataTypes.IntegerType, true));
var schema = DataTypes.createStructType(fields);









    Out[8]:





2



In [9]:

    
// Convert records of the RDD (people) to Rows.
var rowRDD = people.map(function(person){
    var RowFactory = require('eclairjs/sql/RowFactory');
	return RowFactory.create([person.name, person.age]);
});



In [10]:

    
//Apply the schema to the RDD.
var peopleDataFrame = sqlContext.createDataFrame(rowRDD, schema);

// Register the DataFrame as a table.
peopleDataFrame.registerTempTable("people");



In [11]:

    
// SQL can be run over RDDs that have been registered as tables.
var results = sqlContext.sql("SELECT name FROM people");

//The results of SQL queries are DataFrames and support all the normal RDD operations.
//The columns of a row in the result can be accessed by ordinal.
var names = results.toRDD().map(function(row) {
	return "Name: " + row.getString(0);
});

JSON.stringify(names.take(10))









    Out[11]:





["Name: Michael","Name: Andy","Name: Justin"]



In [ ]: