In [1]:
from __future__ import print_function
%pylab inline
from pyspark.sql.types import *
In [2]:
fname = sc.textFile('/Users/omojumiller/mycode/JayZ/JayZ_American Gangster_American Dreamin.txt')
In [4]:
counts = fname.flatMap(lambda line: line.split(" "))\
.map(lambda word: (word, 1))\
.reduceByKey(lambda a, b: a + b)
In [5]:
counts.saveAsTextFile('word_count_out')
In [ ]: