In [1]:
from __future__ import print_function


%pylab inline
from pyspark.sql.types import *


Populating the interactive namespace from numpy and matplotlib

In [2]:
fname = sc.textFile('/Users/omojumiller/mycode/JayZ/JayZ_American Gangster_American Dreamin.txt')

In [4]:
counts = fname.flatMap(lambda line: line.split(" "))\
            .map(lambda word: (word, 1))\
            .reduceByKey(lambda a, b: a + b)

In [5]:
counts.saveAsTextFile('word_count_out')

In [ ]: