In [1]:
# Initialization Spark in Python
from pyspark import SparkContext
sc = SparkContext("local", "Load and save data")
In [14]:
# input
input = sc.textFile("log.txt")
In [15]:
# data
result = sc.parallelize([1, 2, 3, 4])
In [18]:
# output
result.saveAsTextFile("output.txt")
In [46]:
import json
# input
input = sc.textFile("log.json")
data = input.map(lambda x: json.loads(x))
In [47]:
# output
result = data.filter(lambda x: x['code']).map(lambda x: json.dumps(x))
result.saveAsTextFile("output.json")