notebook.community

Edit and run



In [1]:

    
from __future__ import print_function


%pylab inline
from pyspark.sql.types import *









    



Populating the interactive namespace from numpy and matplotlib



In [2]:

    
fname = sc.textFile('/Users/omojumiller/mycode/JayZ/JayZ_American Gangster_American Dreamin.txt')



In [4]:

    
counts = fname.flatMap(lambda line: line.split(" "))\
            .map(lambda word: (word, 1))\
            .reduceByKey(lambda a, b: a + b)



In [5]:

    
counts.saveAsTextFile('word_count_out')



In [ ]: