In [ ]:


In [1]:
from pyspark import SparkContext
sc = SparkContext("local", "First App")

In [2]:
sc.master


Out[2]:
'local'

In [3]:
logFile = "file:///opt/spark/README.md"
logData = sc.textFile(logFile).cache()
numAs = logData.filter(lambda s: 'a' in s).count()
numBs = logData.filter(lambda s: 'b' in s).count()
print("Lines with a:{} , lines with b: {}".format(numAs, numBs))


Lines with a:61 , lines with b: 30

In [ ]: