notebook.community

Edit and run



In [1]:

    
# Initialization Spark in Python
from pyspark import SparkContext



In [2]:

    
# Setting Spark work
sc = SparkContext("local", "Hello World APP")



In [3]:

    
# Should be some file on your system
logFile = "/usr/apache/spark-2.0.2-bin-hadoop2.7/README.md"
logData = sc.textFile(logFile).cache()



In [4]:

    
numAs = logData.filter(lambda s: 'a' in s).count()
numBs = logData.filter(lambda s: 'b' in s).count()



In [5]:

    
print("Lines with a: %i, lines with b: %i" % (numAs, numBs))









    



Lines with a: 61, lines with b: 27



In [6]:

    
sc.stop()



In [ ]: