In [1]:
sqlContext.sql("CREATE TEMPORARY TABLE students_table USING com.stratio.datasource.mongodb OPTIONS (host 'mongo:27017', database 'highschool', collection 'students')")
sqlContext.sql("SELECT * FROM students_table where sex='F' and Mjob='teacher' LIMIT 15").collect()


Out[1]:
[Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'2', G3=u'14', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'14', sex=u'F', _id=u'574e5fd1f8374e03e66d4f4d', absences=u'2', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'health', famrel=u'3', age=u'15', paid=u'no', Walc=u'2', schoolsup=u'no', freetime=u'3', G1=u'14', Fedu=u'4', reason=u'reputation', romantic=u'no', activities=u'no', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'3', health=u'4', G3=u'13', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'13', sex=u'F', _id=u'574e5fd1f8374e03e66d4f82', absences=u'0', Dalc=u'2', school=u'GP', goout=u'4', Fjob=u'health', famrel=u'3', age=u'16', paid=u'no', Walc=u'4', schoolsup=u'yes', freetime=u'4', G1=u'14', Fedu=u'3', reason=u'home', romantic=u'no', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'LE3', traveltime=u'3', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'1', G3=u'16', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'15', sex=u'F', _id=u'574e5fd1f8374e03e66d4f84', absences=u'2', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'services', famrel=u'5', age=u'16', paid=u'no', Walc=u'2', schoolsup=u'no', freetime=u'4', G1=u'16', Fedu=u'3', reason=u'course', romantic=u'no', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'2', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'1', G3=u'14', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'14', sex=u'F', _id=u'574e5fd1f8374e03e66d4fa0', absences=u'2', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'services', famrel=u'5', age=u'16', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'3', G1=u'13', Fedu=u'2', reason=u'home', romantic=u'no', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'2', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'1', health=u'5', G3=u'14', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'14', sex=u'F', _id=u'574e5fd1f8374e03e66d4fd6', absences=u'6', Dalc=u'1', school=u'GP', goout=u'2', Fjob=u'teacher', famrel=u'4', age=u'15', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'3', G1=u'13', Fedu=u'4', reason=u'course', romantic=u'no', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'no'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'3', health=u'5', G3=u'13', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'13', sex=u'F', _id=u'574e5fd1f8374e03e66d4fdc', absences=u'2', Dalc=u'1', school=u'GP', goout=u'2', Fjob=u'services', famrel=u'4', age=u'15', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'2', G1=u'13', Fedu=u'4', reason=u'course', romantic=u'no', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'LE3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'3', G3=u'11', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'10', sex=u'F', _id=u'574e5fd1f8374e03e66d5018', absences=u'0', Dalc=u'1', school=u'GP', goout=u'2', Fjob=u'teacher', famrel=u'4', age=u'16', paid=u'no', Walc=u'2', schoolsup=u'no', freetime=u'5', G1=u'11', Fedu=u'4', reason=u'reputation', romantic=u'no', activities=u'no', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'1', G3=u'15', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'15', sex=u'F', _id=u'574e5fd1f8374e03e66d5020', absences=u'4', Dalc=u'1', school=u'GP', goout=u'2', Fjob=u'other', famrel=u'1', age=u'16', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'3', G1=u'14', Fedu=u'3', reason=u'other', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'no'),
 Row(famsize=u'GT3', traveltime=u'2', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'3', health=u'4', G3=u'12', address=u'R', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'12', sex=u'F', _id=u'574e5fd1f8374e03e66d5022', absences=u'0', Dalc=u'1', school=u'GP', goout=u'2', Fjob=u'other', famrel=u'4', age=u'17', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'4', G1=u'11', Fedu=u'3', reason=u'reputation', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'3', health=u'5', G3=u'16', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'16', sex=u'F', _id=u'574e5fd1f8374e03e66d5031', absences=u'4', Dalc=u'1', school=u'GP', goout=u'2', Fjob=u'services', famrel=u'5', age=u'16', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'3', G1=u'15', Fedu=u'4', reason=u'home', romantic=u'no', activities=u'yes', nursery=u'no', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'LE3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'4', G3=u'14', address=u'U', internet=u'no', Pstatus=u'A', higher=u'yes', G2=u'14', sex=u'F', _id=u'574e5fd1f8374e03e66d504d', absences=u'4', Dalc=u'1', school=u'GP', goout=u'2', Fjob=u'other', famrel=u'4', age=u'17', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'3', G1=u'15', Fedu=u'2', reason=u'course', romantic=u'no', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'5', G3=u'12', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'11', sex=u'F', _id=u'574e5fd1f8374e03e66d5052', absences=u'2', Dalc=u'1', school=u'GP', goout=u'1', Fjob=u'services', famrel=u'5', age=u'17', paid=u'yes', Walc=u'4', schoolsup=u'no', freetime=u'3', G1=u'11', Fedu=u'4', reason=u'course', romantic=u'yes', activities=u'no', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'LE3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'4', health=u'4', G3=u'17', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'15', sex=u'F', _id=u'574e5fd1f8374e03e66d5064', absences=u'2', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'services', famrel=u'4', age=u'17', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'2', G1=u'14', Fedu=u'2', reason=u'reputation', romantic=u'no', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'2', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'4', G3=u'12', address=u'R', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'11', sex=u'F', _id=u'574e5fd1f8374e03e66d5077', absences=u'8', Dalc=u'2', school=u'GP', goout=u'4', Fjob=u'other', famrel=u'4', age=u'18', paid=u'no', Walc=u'2', schoolsup=u'no', freetime=u'3', G1=u'10', Fedu=u'4', reason=u'reputation', romantic=u'no', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'no'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'1', health=u'5', G3=u'17', address=u'R', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'16', sex=u'F', _id=u'574e5fd1f8374e03e66d50a7', absences=u'2', Dalc=u'1', school=u'GP', goout=u'4', Fjob=u'teacher', famrel=u'4', age=u'17', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'4', G1=u'15', Fedu=u'4', reason=u'course', romantic=u'no', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'no')]

In [2]:
df = sqlContext.read.format('com.stratio.datasource.mongodb').options(host='mongo:27017', database='highschool', collection='students').load()

In [3]:
df.filter(df['Mjob'] == 'teacher').take(10)


Out[3]:
[Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'2', G3=u'14', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'14', sex=u'F', _id=u'574e5fd1f8374e03e66d4f4d', absences=u'2', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'health', famrel=u'3', age=u'15', paid=u'no', Walc=u'2', schoolsup=u'no', freetime=u'3', G1=u'14', Fedu=u'4', reason=u'reputation', romantic=u'no', activities=u'no', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'2', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'3', G3=u'13', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'12', sex=u'M', _id=u'574e5fd1f8374e03e66d4f50', absences=u'0', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'other', famrel=u'5', age=u'15', paid=u'no', Walc=u'2', schoolsup=u'no', freetime=u'4', G1=u'12', Fedu=u'3', reason=u'course', romantic=u'no', activities=u'no', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'1', G3=u'14', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'13', sex=u'M', _id=u'574e5fd1f8374e03e66d4f57', absences=u'0', Dalc=u'1', school=u'GP', goout=u'1', Fjob=u'other', famrel=u'4', age=u'15', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'4', G1=u'12', Fedu=u'3', reason=u'reputation', romantic=u'no', activities=u'no', nursery=u'yes', Medu=u'4', famsup=u'no'),
 Row(famsize=u'LE3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'5', G3=u'14', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'13', sex=u'M', _id=u'574e5fd1f8374e03e66d4f59', absences=u'0', Dalc=u'1', school=u'GP', goout=u'1', Fjob=u'other', famrel=u'4', age=u'16', paid=u'no', Walc=u'3', schoolsup=u'no', freetime=u'5', G1=u'12', Fedu=u'2', reason=u'course', romantic=u'no', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'no'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'5', G3=u'12', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'11', sex=u'M', _id=u'574e5fd1f8374e03e66d4f60', absences=u'4', Dalc=u'5', school=u'GP', goout=u'5', Fjob=u'teacher', famrel=u'4', age=u'16', paid=u'yes', Walc=u'5', schoolsup=u'no', freetime=u'4', G1=u'12', Fedu=u'4', reason=u'home', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'5', G3=u'15', address=u'R', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'14', sex=u'M', _id=u'574e5fd1f8374e03e66d4f63', absences=u'0', Dalc=u'1', school=u'GP', goout=u'2', Fjob=u'at_home', famrel=u'4', age=u'15', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'5', G1=u'13', Fedu=u'3', reason=u'course', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'LE3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'3', health=u'4', G3=u'14', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'14', sex=u'M', _id=u'574e5fd1f8374e03e66d4f67', absences=u'0', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'services', famrel=u'5', age=u'15', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'4', G1=u'14', Fedu=u'3', reason=u'home', romantic=u'no', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'LE3', traveltime=u'1', guardian=u'other', Mjob=u'teacher', failures=u'0', studytime=u'1', health=u'5', G3=u'11', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'11', sex=u'M', _id=u'574e5fd1f8374e03e66d4f6c', absences=u'8', Dalc=u'2', school=u'GP', goout=u'3', Fjob=u'other', famrel=u'5', age=u'15', paid=u'no', Walc=u'4', schoolsup=u'no', freetime=u'4', G1=u'10', Fedu=u'4', reason=u'home', romantic=u'yes', activities=u'no', nursery=u'no', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'5', G3=u'13', address=u'U', internet=u'no', Pstatus=u'T', higher=u'yes', G2=u'12', sex=u'M', _id=u'574e5fd1f8374e03e66d4f73', absences=u'4', Dalc=u'2', school=u'GP', goout=u'3', Fjob=u'other', famrel=u'4', age=u'15', paid=u'no', Walc=u'2', schoolsup=u'no', freetime=u'3', G1=u'11', Fedu=u'2', reason=u'home', romantic=u'no', activities=u'no', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'5', G3=u'16', address=u'U', internet=u'no', Pstatus=u'T', higher=u'yes', G2=u'15', sex=u'M', _id=u'574e5fd1f8374e03e66d4f7c', absences=u'8', Dalc=u'1', school=u'GP', goout=u'2', Fjob=u'health', famrel=u'3', age=u'15', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'2', G1=u'15', Fedu=u'4', reason=u'reputation', romantic=u'no', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes')]

In [4]:
view = sqlContext.sql("select * from students_table where Fjob='teacher' and romantic='yes'")
view.take(10)


Out[4]:
[Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'5', G3=u'12', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'11', sex=u'M', _id=u'574e5fd1f8374e03e66d4f60', absences=u'4', Dalc=u'5', school=u'GP', goout=u'5', Fjob=u'teacher', famrel=u'4', age=u'16', paid=u'yes', Walc=u'5', schoolsup=u'no', freetime=u'4', G1=u'12', Fedu=u'4', reason=u'home', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'2', guardian=u'mother', Mjob=u'other', failures=u'0', studytime=u'3', health=u'5', G3=u'13', address=u'R', internet=u'yes', Pstatus=u'A', higher=u'yes', G2=u'13', sex=u'M', _id=u'574e5fd1f8374e03e66d4f68', absences=u'4', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'teacher', famrel=u'2', age=u'16', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'4', G1=u'13', Fedu=u'4', reason=u'reputation', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'father', Mjob=u'services', failures=u'0', studytime=u'3', health=u'4', G3=u'16', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'16', sex=u'M', _id=u'574e5fd1f8374e03e66d4fa8', absences=u'0', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'teacher', famrel=u'4', age=u'16', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'4', G1=u'16', Fedu=u'4', reason=u'other', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'2', guardian=u'father', Mjob=u'services', failures=u'0', studytime=u'3', health=u'5', G3=u'12', address=u'R', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'11', sex=u'F', _id=u'574e5fd1f8374e03e66d4fc5', absences=u'0', Dalc=u'2', school=u'GP', goout=u'2', Fjob=u'teacher', famrel=u'4', age=u'15', paid=u'no', Walc=u'2', schoolsup=u'no', freetime=u'2', G1=u'10', Fedu=u'4', reason=u'course', romantic=u'yes', activities=u'no', nursery=u'yes', Medu=u'3', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'4', guardian=u'mother', Mjob=u'at_home', failures=u'0', studytime=u'2', health=u'5', G3=u'11', address=u'R', internet=u'no', Pstatus=u'T', higher=u'yes', G2=u'11', sex=u'M', _id=u'574e5fd1f8374e03e66d4fcd', absences=u'2', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'teacher', famrel=u'5', age=u'15', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'3', G1=u'12', Fedu=u'4', reason=u'course', romantic=u'yes', activities=u'no', nursery=u'yes', Medu=u'3', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'1', health=u'5', G3=u'8', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'no', G2=u'9', sex=u'M', _id=u'574e5fd1f8374e03e66d4fe4', absences=u'16', Dalc=u'2', school=u'GP', goout=u'2', Fjob=u'teacher', famrel=u'3', age=u'16', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'3', G1=u'9', Fedu=u'4', reason=u'course', romantic=u'yes', activities=u'no', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'other', failures=u'0', studytime=u'1', health=u'4', G3=u'13', address=u'U', internet=u'no', Pstatus=u'T', higher=u'yes', G2=u'13', sex=u'F', _id=u'574e5fd1f8374e03e66d4fea', absences=u'0', Dalc=u'1', school=u'GP', goout=u'1', Fjob=u'teacher', famrel=u'4', age=u'17', paid=u'no', Walc=u'1', schoolsup=u'yes', freetime=u'2', G1=u'13', Fedu=u'4', reason=u'course', romantic=u'yes', activities=u'no', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'2', G3=u'13', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'13', sex=u'M', _id=u'574e5fd1f8374e03e66d5039', absences=u'0', Dalc=u'1', school=u'GP', goout=u'5', Fjob=u'teacher', famrel=u'4', age=u'17', paid=u'no', Walc=u'3', schoolsup=u'yes', freetime=u'5', G1=u'13', Fedu=u'4', reason=u'reputation', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'LE3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'1', health=u'1', G3=u'17', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'18', sex=u'M', _id=u'574e5fd1f8374e03e66d509b', absences=u'0', Dalc=u'2', school=u'GP', goout=u'2', Fjob=u'teacher', famrel=u'1', age=u'18', paid=u'no', Walc=u'2', schoolsup=u'no', freetime=u'4', G1=u'18', Fedu=u'4', reason=u'home', romantic=u'yes', activities=u'no', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'other', failures=u'0', studytime=u'2', health=u'5', G3=u'11', address=u'U', internet=u'no', Pstatus=u'T', higher=u'yes', G2=u'10', sex=u'F', _id=u'574e5fd1f8374e03e66d50a4', absences=u'2', Dalc=u'1', school=u'GP', goout=u'4', Fjob=u'teacher', famrel=u'3', age=u'18', paid=u'no', Walc=u'2', schoolsup=u'yes', freetime=u'4', G1=u'10', Fedu=u'4', reason=u'home', romantic=u'yes', activities=u'no', nursery=u'no', Medu=u'1', famsup=u'yes')]

In [8]:
view.write.format("com.stratio.datasource.mongodb").mode('overwrite').options(host='mongo:27017', database='highschool', collection='studentsview').save()

In [10]:
import pyspark_elastic

In [40]:
import json
def row_to_dict(row):
    d = row.asDict()
    id = d['_id']
    return id, {key: value for key, value in d.iteritems() if key != '_id' and value and key}
view_dicts = view.select("_id", "reason").map(lambda row: row_to_dict)

In [84]:
es_items = df.map(row_to_dict)

In [85]:
sc.setLocalProperty("es.nodes","elasticsearch")
sc.setSystemProperty("es.nodes","elasticsearch")

In [87]:
es_items.saveAsNewAPIHadoopFile(
    path='-', 
    outputFormatClass="org.elasticsearch.hadoop.mr.EsOutputFormat",
    keyClass="org.apache.hadoop.io.NullWritable", 
    valueClass="org.elasticsearch.hadoop.mr.LinkedMapWritable", 
    conf={ "es.resource" : "students/items" ,"es.nodes": "elasticsearch"})

In [88]:
es_rdd = sc.newAPIHadoopRDD(
    inputFormatClass="org.elasticsearch.hadoop.mr.EsInputFormat",
    keyClass="org.apache.hadoop.io.NullWritable", 
    valueClass="org.elasticsearch.hadoop.mr.LinkedMapWritable", 
    conf={ "es.resource" : "students/items","es.nodes": "elasticsearch"})

In [89]:
from pyspark.sql import Row
def item_to_row(item):
    id, data = item
    data['_id'] = id
    return Row(**data)
rows = es_rdd.map(item_to_row)

es_df = rows.toDF()
es_df.registerTempTable('es_table')

In [90]:
sqlContext.sql("select * from students_table where Fjob='teacher' and romantic='yes'").collect()


Out[90]:
[Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'5', G3=u'12', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'11', sex=u'M', _id=u'574e5fd1f8374e03e66d4f60', absences=u'4', Dalc=u'5', school=u'GP', goout=u'5', Fjob=u'teacher', famrel=u'4', age=u'16', paid=u'yes', Walc=u'5', schoolsup=u'no', freetime=u'4', G1=u'12', Fedu=u'4', reason=u'home', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'2', guardian=u'mother', Mjob=u'other', failures=u'0', studytime=u'3', health=u'5', G3=u'13', address=u'R', internet=u'yes', Pstatus=u'A', higher=u'yes', G2=u'13', sex=u'M', _id=u'574e5fd1f8374e03e66d4f68', absences=u'4', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'teacher', famrel=u'2', age=u'16', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'4', G1=u'13', Fedu=u'4', reason=u'reputation', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'father', Mjob=u'services', failures=u'0', studytime=u'3', health=u'4', G3=u'16', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'16', sex=u'M', _id=u'574e5fd1f8374e03e66d4fa8', absences=u'0', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'teacher', famrel=u'4', age=u'16', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'4', G1=u'16', Fedu=u'4', reason=u'other', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'2', guardian=u'father', Mjob=u'services', failures=u'0', studytime=u'3', health=u'5', G3=u'12', address=u'R', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'11', sex=u'F', _id=u'574e5fd1f8374e03e66d4fc5', absences=u'0', Dalc=u'2', school=u'GP', goout=u'2', Fjob=u'teacher', famrel=u'4', age=u'15', paid=u'no', Walc=u'2', schoolsup=u'no', freetime=u'2', G1=u'10', Fedu=u'4', reason=u'course', romantic=u'yes', activities=u'no', nursery=u'yes', Medu=u'3', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'4', guardian=u'mother', Mjob=u'at_home', failures=u'0', studytime=u'2', health=u'5', G3=u'11', address=u'R', internet=u'no', Pstatus=u'T', higher=u'yes', G2=u'11', sex=u'M', _id=u'574e5fd1f8374e03e66d4fcd', absences=u'2', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'teacher', famrel=u'5', age=u'15', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'3', G1=u'12', Fedu=u'4', reason=u'course', romantic=u'yes', activities=u'no', nursery=u'yes', Medu=u'3', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'1', health=u'5', G3=u'8', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'no', G2=u'9', sex=u'M', _id=u'574e5fd1f8374e03e66d4fe4', absences=u'16', Dalc=u'2', school=u'GP', goout=u'2', Fjob=u'teacher', famrel=u'3', age=u'16', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'3', G1=u'9', Fedu=u'4', reason=u'course', romantic=u'yes', activities=u'no', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'other', failures=u'0', studytime=u'1', health=u'4', G3=u'13', address=u'U', internet=u'no', Pstatus=u'T', higher=u'yes', G2=u'13', sex=u'F', _id=u'574e5fd1f8374e03e66d4fea', absences=u'0', Dalc=u'1', school=u'GP', goout=u'1', Fjob=u'teacher', famrel=u'4', age=u'17', paid=u'no', Walc=u'1', schoolsup=u'yes', freetime=u'2', G1=u'13', Fedu=u'4', reason=u'course', romantic=u'yes', activities=u'no', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'2', health=u'2', G3=u'13', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'13', sex=u'M', _id=u'574e5fd1f8374e03e66d5039', absences=u'0', Dalc=u'1', school=u'GP', goout=u'5', Fjob=u'teacher', famrel=u'4', age=u'17', paid=u'no', Walc=u'3', schoolsup=u'yes', freetime=u'5', G1=u'13', Fedu=u'4', reason=u'reputation', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'LE3', traveltime=u'1', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'1', health=u'1', G3=u'17', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'18', sex=u'M', _id=u'574e5fd1f8374e03e66d509b', absences=u'0', Dalc=u'2', school=u'GP', goout=u'2', Fjob=u'teacher', famrel=u'1', age=u'18', paid=u'no', Walc=u'2', schoolsup=u'no', freetime=u'4', G1=u'18', Fedu=u'4', reason=u'home', romantic=u'yes', activities=u'no', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'mother', Mjob=u'other', failures=u'0', studytime=u'2', health=u'5', G3=u'11', address=u'U', internet=u'no', Pstatus=u'T', higher=u'yes', G2=u'10', sex=u'F', _id=u'574e5fd1f8374e03e66d50a4', absences=u'2', Dalc=u'1', school=u'GP', goout=u'4', Fjob=u'teacher', famrel=u'3', age=u'18', paid=u'no', Walc=u'2', schoolsup=u'yes', freetime=u'4', G1=u'10', Fedu=u'4', reason=u'home', romantic=u'yes', activities=u'no', nursery=u'no', Medu=u'1', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'2', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'3', health=u'4', G3=u'15', address=u'U', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'14', sex=u'F', _id=u'574e5fd1f8374e03e66d50c0', absences=u'4', Dalc=u'1', school=u'GP', goout=u'3', Fjob=u'teacher', famrel=u'4', age=u'17', paid=u'no', Walc=u'2', schoolsup=u'no', freetime=u'3', G1=u'15', Fedu=u'4', reason=u'course', romantic=u'yes', activities=u'no', nursery=u'no', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'2', guardian=u'mother', Mjob=u'teacher', failures=u'0', studytime=u'3', health=u'4', G3=u'17', address=u'R', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'16', sex=u'F', _id=u'574e5fd1f8374e03e66d5103', absences=u'6', Dalc=u'1', school=u'MS', goout=u'2', Fjob=u'teacher', famrel=u'4', age=u'16', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'2', G1=u'16', Fedu=u'4', reason=u'course', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'no'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'father', Mjob=u'other', failures=u'0', studytime=u'2', health=u'3', G3=u'13', address=u'U', internet=u'no', Pstatus=u'T', higher=u'yes', G2=u'12', sex=u'M', _id=u'574e5fd1f8374e03e66d5118', absences=u'0', Dalc=u'1', school=u'MS', goout=u'1', Fjob=u'teacher', famrel=u'4', age=u'16', paid=u'no', Walc=u'1', schoolsup=u'no', freetime=u'3', G1=u'13', Fedu=u'4', reason=u'course', romantic=u'yes', activities=u'yes', nursery=u'yes', Medu=u'4', famsup=u'yes'),
 Row(famsize=u'GT3', traveltime=u'1', guardian=u'father', Mjob=u'health', failures=u'0', studytime=u'2', health=u'2', G3=u'16', address=u'R', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'16', sex=u'F', _id=u'574e5fd1f8374e03e66d5147', absences=u'0', Dalc=u'2', school=u'MS', goout=u'3', Fjob=u'teacher', famrel=u'4', age=u'16', paid=u'no', Walc=u'3', schoolsup=u'no', freetime=u'3', G1=u'14', Fedu=u'4', reason=u'reputation', romantic=u'yes', activities=u'yes', nursery=u'no', Medu=u'4', famsup=u'no'),
 Row(famsize=u'GT3', traveltime=u'3', guardian=u'father', Mjob=u'other', failures=u'0', studytime=u'2', health=u'5', G3=u'0', address=u'R', internet=u'yes', Pstatus=u'T', higher=u'yes', G2=u'5', sex=u'F', _id=u'574e5fd1f8374e03e66d51b5', absences=u'0', Dalc=u'4', school=u'MS', goout=u'2', Fjob=u'teacher', famrel=u'3', age=u'18', paid=u'no', Walc=u'2', schoolsup=u'no', freetime=u'2', G1=u'7', Fedu=u'4', reason=u'other', romantic=u'yes', activities=u'no', nursery=u'no', Medu=u'4', famsup=u'yes')]

In [ ]: