In [ ]:
In [1]:
from pyspark import SparkContext
sc = SparkContext("local", "First App")
In [2]:
sc.master
Out[2]:
In [3]:
logFile = "file:///opt/spark/README.md"
logData = sc.textFile(logFile).cache()
numAs = logData.filter(lambda s: 'a' in s).count()
numBs = logData.filter(lambda s: 'b' in s).count()
print("Lines with a:{} , lines with b: {}".format(numAs, numBs))
In [ ]: