In [2]:
#kelime ayiklama
file = open('terminal.txt', 'r')
book = file.read()
def tokenize():
if book is not None:
words = book.lower().split()
return words
else:
return None
def map_book(tokens):
hash_map = {}
if tokens is not None:
for element in tokens:
# Remove Punctuation
word = element.replace(",","")
word = word.replace(".","")
# Word Exist?
if word in hash_map:
hash_map[word] = hash_map[word] + 1
else:
hash_map[word] = 1
return hash_map
else:
return None
# Tokenize the Book
words = tokenize()
word_list = ['the','life','situations','since','day','hdfs','hadoop']
# Create a Hash Map (Dictionary)
map = map_book(words)
# Show Word Information
for word in word_list:
print ('Word: [' + word + '] Frequency: ' + str(map[word]))
In [1]:
accum = sc.accumulator(0)
sc.parallelize([1, 2, 3, 4]).foreach(lambda x: accum.add(x))
print(accum.value)
def g(x):
accum.add(x)
return f(x)
In [33]:
In [32]:
import threading
from time import sleep
result = "Not Set"
lock = threading.Lock()
def map_func(x):
sleep(100)
raise Exception("Task should have been cancelled")
def start_job(x):
global result
try:
sc.setJobGroup("job_to_cancel", "some description")
result = sc.parallelize(range(x)).map(map_func).collect()
except Exception as e:
result = "Cancelled"
lock.release()
def stop_job():
sleep(5)
sc.cancelJobGroup("job_to_cancel")
supress = lock.acquire()
supress = threading.Thread(target=start_job, args=(10,)).start()
supress = threading.Thread(target=stop_job).start()
supress = lock.acquire()
print(result)
seqOp = (lambda x, y: (x[0] + y, x[1] + 1))
combOp = (lambda x, y: (x[0] + y[0], x[1] + y[1]))
print(seqOp)
print(combOp)
In [ ]: