In [1]:
# https://marcobonzanini.com/2015/07/09/getting-started-with-apache-spark-and-python-3/
from pyspark import SparkContext
fname = "gettingstarted.txt"
search1 = "peter"
search2 = "mimi"
sc = SparkContext("local", appName="Line Count")
data = sc.textFile(fname)
# Transformations
filtered_data1 = data.filter(lambda s: search1 in s.lower())
filtered_data2 = data.filter(lambda s: search2 in s.lower())
# Actions
num1 = filtered_data1.count()
num2 = filtered_data2.count()
print('Lines with "%s": %i, lines with "%s": %i' % (search1, num1, search2, num2))
In [ ]: