In [2]:
#kelime ayiklama
file = open('terminal.txt', 'r')
book = file.read()


def tokenize():
    if book is not None:
        words = book.lower().split()
        return words
    else:
        return None
        

def map_book(tokens):
    hash_map = {}

    if tokens is not None:
        for element in tokens:
            # Remove Punctuation
            word = element.replace(",","")
            word = word.replace(".","")

            # Word Exist?
            if word in hash_map:
                hash_map[word] = hash_map[word] + 1
            else:
                hash_map[word] = 1

        return hash_map
    else:
        return None


# Tokenize the Book
words = tokenize()
word_list = ['the','life','situations','since','day','hdfs','hadoop']

# Create a Hash Map (Dictionary)
map = map_book(words)

# Show Word Information
for word in word_list:
    print ('Word: [' + word + '] Frequency: ' + str(map[word]))


Word: [the] Frequency: 9
Word: [life] Frequency: 1
Word: [situations] Frequency: 1
Word: [since] Frequency: 1
Word: [day] Frequency: 4
Word: [hdfs] Frequency: 11
Word: [hadoop] Frequency: 8

In [1]:
accum = sc.accumulator(0)
sc.parallelize([1, 2, 3, 4]).foreach(lambda x: accum.add(x))
print(accum.value)
def g(x):
    accum.add(x)
    return f(x)


10

In [33]:



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-33-e6415a8ef9d9> in <module>()
----> 1 class VectorAccumulatorParam(AccumulatorParam):
      2     def zero(self, initialValue):
      3         return Vector.zeros(initialValue.size)
      4 
      5     def addInPlace(self, v1, v2):

NameError: name 'AccumulatorParam' is not defined

In [32]:
import threading
from time import sleep
result = "Not Set"
lock = threading.Lock()
def map_func(x):
    sleep(100)
    raise Exception("Task should have been cancelled")
def start_job(x):
    global result
    try:
        sc.setJobGroup("job_to_cancel", "some description")
        result = sc.parallelize(range(x)).map(map_func).collect()
    except Exception as e:
        result = "Cancelled"
    lock.release()
def stop_job():
    sleep(5)
    sc.cancelJobGroup("job_to_cancel")
supress = lock.acquire()
supress = threading.Thread(target=start_job, args=(10,)).start()
supress = threading.Thread(target=stop_job).start()
supress = lock.acquire()
print(result)
seqOp = (lambda x, y: (x[0] + y, x[1] + 1))
combOp = (lambda x, y: (x[0] + y[0], x[1] + y[1]))
print(seqOp)
print(combOp)


Cancelled
<function <lambda> at 0x1080d8840>
<function <lambda> at 0x1080d8ae8>

In [ ]: