In [ ]:
# %load data/numbers.txt
1 10
2 11
3 3
4 12
5 4
6 1
7 1
8 41
9 532
10 2
11 0
In [ ]:
# %load code/MRSortByString.py
from mrjob.job import MRJob
class MRSortByString(MRJob):
def mapper(self, _, line):
"""
"""
l = line.split(' ')
print l
yield l[1], l[0]
def reducer(self, key, val):
yield key, [v for v in val][0]
if __name__ == '__main__':
MRSortByString.run()
In [31]:
%run code/MRSortByString.py data/numbers.txt
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
INFO:mrjob.conf:no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
no configs found; falling back on auto-configuration
INFO:mrjob.conf:no configs found; falling back on auto-configuration
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
INFO:mrjob.runner:creating tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
WARNING:mrjob.runner:
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
WARNING:mrjob.runner:PLEASE NOTE: Starting in mrjob v0.5.0, protocols will be strict by default. It's recommended you run your job with --strict-protocols or set up mrjob.conf as described at https://pythonhosted.org/mrjob/whats-new.html#ready-for-strict-protocols
WARNING:mrjob.runner:
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
INFO:mrjob.sim:writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
INFO:mrjob.runner:Counters from step 1:
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
INFO:mrjob.runner: (no counters found)
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
INFO:mrjob.runner:writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper-sorted
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
INFO:mrjob.runner:> sort 'c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-mapper_part-00000'
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
INFO:mrjob.sim:writing to c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
Counters from step 1:
INFO:mrjob.runner:Counters from step 1:
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
(no counters found)
INFO:mrjob.runner: (no counters found)
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
INFO:mrjob.sim:Moving c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\step-0-reducer_part-00000 -> c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output\part-00000
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
INFO:mrjob.runner:Streaming final output from c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000\output
['1', '10']
['2', '11']
['3', '3']
['4', '12']
['5', '4']
['6', '1']
['7', '1']
['8', '41']
['9', '532']
['10', '2']
['11', '0']
"0" "11"
"1" "6"
"10" "1"
"11" "2"
"12" "4"
"2" "10"
"3" "3"
"4" "5"
"41" "8"
"532" "9"
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
INFO:mrjob.runner:removing tmp directory c:\users\ps\appdata\local\temp\MRSortByString.PS.20150922.021119.320000
In [32]:
%run code/MRSortByInt.py data/numbers.txt
File "c:\Users\PS\Documents\GitHub\big-data-python-class\Lectures\Lecture5-MapReduce\code\MRSortByInt.py", line 14
if __name__ == '__main__':
^
SyntaxError: invalid syntax
sortdata.txt
1 1 2 4 3 8 4 2 4 7 5 5 6 10 7 11
In [1]:
# -*- coding: utf-8 -*-
# Testing word frequency count
from MRSortByString import *
from mrjob.job import MRJob
'''
This is a simple wrapper that runs mrjob MapReduce jobs, the inputs are:
MRJobClass - the class of the job to be run
argsArr - an array of strings to be used when creating the MRJob.
@author: Peter Harrington if you have any questions: peter.b.harrington@gmail.com
'''
def runJob(MRJobClass, argsArr, loc='local'):
if loc == 'emr':
argsArr.extend(['-r', 'emr'])
print "starting %s job on %s" % (MRJobClass.__name__, loc)
mrJob = MRJobClass(args=argsArr)
runner = mrJob.make_runner()
runner.run()
print "finished %s job" % MRJobClass.__name__
return mrJob, runner
def runParallelJob(MRJobClass, argsArr): #TO DO: add threading to allow jobs to run in
pass #parallel
#launch a new thread
#call runJob(MRJobClass, argsArr) on the new thread
if __name__ == '__main__':
# pass in file from outside
# MRWordFrequencyCount.run()
#setup file here
mr_job, runner = runJob(MRSortByString,["C:\data\sortdata.txt"],"local")
print "Sorting sortdata.txt"
for line in runner.stream_output():
key, value = mr_job.parse_output_line(line)
print "%s: %s "%(key,value)
WARNING:mrjob.job:mr() is deprecated and will be removed in v0.6.0. Use mrjob.step.MRStep directly instead.
WARNING:mrjob.job:mr() is deprecated and will be removed in v0.6.0. Use mrjob.step.MRStep directly instead.
WARNING:mrjob.job:mr() is deprecated and will be removed in v0.6.0. Use mrjob.step.MRStep directly instead.
WARNING:mrjob.job:mr() is deprecated and will be removed in v0.6.0. Use mrjob.step.MRStep directly instead.
WARNING:mrjob.job:mr() is deprecated and will be removed in v0.6.0. Use mrjob.step.MRStep directly instead.
starting MRSortByString job on local
finished MRSortByString job
Sorting sortdata.txt
1: 1
10: 6
11: 7
2: 4
4: 2
5: 5
7: 4
8: 3
Note the second column is reported by their string values
Content source: MysariRaghav/big-data-python-class
Similar notebooks: