In [3]:
from IPython.display import HTML
HTML('<iframe width="846" height="476" src="https://www.youtube.com/embed/KdSqUjFWzdY" frameborder="0" allowfullscreen></iframe>')
Out[3]:
In [5]:
from IPython.display import HTML
HTML('<iframe width="960" height="540" src="https://www.youtube.com/embed/gYiwszKaCoQ" frameborder="0" allowfullscreen></iframe>')
Out[5]:
In [6]:
from IPython.display import HTML
HTML('<iframe width="798" height="449" src="https://www.youtube.com/embed/gI4HN0JhPmo" frameborder="0" allowfullscreen></iframe>')
Out[6]:
import logging
import sys
import string
from util import logfile
logging.basicConfig(filename=logfile, format='%(message)s',
level=logging.INFO, filemode='w')
def word_count():
# For this exercise, write a program that serially counts the number of occurrences
# of each word in the book Alice in Wonderland.
#
# The text of Alice in Wonderland will be fed into your program line-by-line.
# Your program needs to take each line and do the following:
# 1) Tokenize the line into string tokens by whitespace
# Example: "Hello, World!" should be converted into "Hello," and "World!"
# (This part has been done for you.)
#
# 2) Remove all punctuation
# Example: "Hello," and "World!" should be converted into "Hello" and "World"
#
# 3) Make all letters lowercase
# Example: "Hello" and "World" should be converted to "hello" and "world"
#
# Store the the number of times that a word appears in Alice in Wonderland
# in the word_counts dictionary, and then *print* (don't return) that dictionary
#
# In this exercise, print statements will be considered your final output. Because
# of this, printing a debug statement will cause the grader to break. Instead,
# you can use the logging module which we've configured for you.
#
# For example:
# logging.info("My debugging message")
#
# The logging module can be used to give you more control over your
# debugging or other messages than you can get by printing them. Messages
# logged via the logger we configured will be saved to a
# file. If you click "Test Run", then you will see the contents of that file
# once your program has finished running.
#
# The logging module also has other capabilities; see
# https://docs.python.org/2/library/logging.html
# for more information.
# Create an empty dictionary to store word/frequency pair as key/value
word_counts = {}
for line in sys.stdin:
# 2) Remove all punctuation
# Example: "Hello," and "World!" should be converted into "Hello" and "World"
# 3) Make all letters lowercase
# Example: "Hello" and "World" should be converted to "hello" and "world"
data = line.strip().split(" ")
# Your code here
# With each word in the list, remove any punctuation and turn it into lowercase.
# Check if the word appears or not, if yes, +1 to key value otherwise assigns its
# value to 1.
for i in data:
key = i.translate(string.maketrans("",""), string.punctuation).lower()
if key in word_counts.keys():
word_counts[key] += 1
else:
word_counts[key] = 1
print word_counts
word_count()
In [7]:
from IPython.display import HTML
HTML('<iframe width="798" height="449" src="https://www.youtube.com/embed/onseMon9zqA" frameborder="0" allowfullscreen></iframe>')
Out[7]:
In [8]:
from IPython.display import HTML
HTML('<iframe width="798" height="449" src="https://www.youtube.com/embed/_q6098sNqpo" frameborder="0" allowfullscreen></iframe>')
Out[8]:
In [9]:
from IPython.display import HTML
HTML('<iframe width="798" height="449" src="https://www.youtube.com/embed/mPYxFC7DI28" frameborder="0" allowfullscreen></iframe>')
Out[9]:
In [2]:
from IPython.display import HTML
HTML('<iframe width="798" height="449" src="https://www.youtube.com/embed/bkhuEG0D2HM" frameborder="0" allowfullscreen></iframe>')
Out[2]:
import sys
import string
import logging
from util import mapper_logfile
logging.basicConfig(filename=mapper_logfile, format='%(message)s',
level=logging.INFO, filemode='w')
def mapper():
#Also make sure to fill out the reducer code before clicking "Test Run" or "Submit".
#Each line will be a comma-separated list of values. The
#header row WILL be included. Tokenize each row using the
#commas, and emit (i.e. print) a key-value pair containing the
#district (not state) and Aadhaar generated, separated by a tab.
#Skip rows without the correct number of tokens and also skip
#the header row.
#You can see a copy of the the input Aadhaar data
#in the link below:
#https://www.dropbox.com/s/vn8t4uulbsfmalo/aadhaar_data.csv
#Since you are printing the output of your program, printing a debug
#statement will interfere with the operation of the grader. Instead,
#use the logging module, which we've configured to log to a file printed
#when you click "Test Run". For example:
#logging.info("My debugging message")
#
#Note that, unlike print, logging.info will take only a single argument.
#So logging.info("my message") will work, but logging.info("my","message") will not.
for line in sys.stdin:
#your code here
# tokenize the line of data
data = line.strip().split(",")
if len(data) != 12 or data[0] == 'Registrar':
continue
print "{0}\t{1}".format(data[3],data[8])
mapper()
import sys
import logging
from util import reducer_logfile
logging.basicConfig(filename=reducer_logfile, format='%(message)s',
level=logging.INFO, filemode='w')
def reducer():
#Also make sure to fill out the mapper code before clicking "Test Run" or "Submit".
#Each line will be a key-value pair separated by a tab character.
#Print out each key once, along with the total number of Aadhaar
#generated, separated by a tab. Make sure each key-value pair is
#formatted correctly! Here's a sample final key-value pair: 'Gujarat\t5.0'
#Since you are printing the output of your program, printing a debug
#statement will interfere with the operation of the grader. Instead,
#use the logging module, which we've configured to log to a file printed
#when you click "Test Run". For example:
#logging.info("My debugging message")
#Note that, unlike print, logging.info will take only a single argument.
#So logging.info("my message") will work, but logging.info("my","message") will not.
# Initialize values
aadhaar_generated = 0
old_key = None
for line in sys.stdin:
# your code here
data = line.strip().split("\t")
if len(data) != 2:
continue
this_key, count = data
if old_key and old_key != this_key:
print "{0}\t{1}".format(old_key, aadhaar_generated)
aadhaar_generated = 0
old_key = this_key
aadhaar_generated += float(count)
if old_key != None:
print "{0}\t{1}".format(old_key, aadhaar_generated)
reducer()
MapReduce programming model
In [1]:
# Recap
from IPython.display import HTML
HTML('<iframe width="798" height="449" src="https://www.youtube.com/embed/Pl68U2iGtyI" frameborder="0" allowfullscreen></iframe>')
Out[1]: