In [4]:
from mrjob.job import MRJob

class MRSortByString(MRJob):
    def mapper(self, _, line):
        """
        """
        l = line.split(' ')
        yield l[1], l[0]

    def reducer(self, key, val):
        yield key, [v for v in val][0]


if __name__ == '__main__':
    print " stored in MRSortByString.py"
    #MRSortByString.run()


 stored in MRSortByString.py

sortdata.txt

1 1 2 4 3 8 4 2 4 7 5 5 6 10 7 11


In [1]:
# -*- coding: utf-8 -*-
# Testing word frequency count
from MRSortByString import *
from mrjob.job import MRJob
'''
This is a simple wrapper that runs mrjob MapReduce jobs, the inputs are:
MRJobClass - the class of the job to be run
argsArr - an array of strings to be used when creating the MRJob.
@author: Peter Harrington  if you have any questions: peter.b.harrington@gmail.com
'''
def runJob(MRJobClass, argsArr, loc='local'):
    if loc == 'emr': 
        argsArr.extend(['-r', 'emr'])
    print "starting %s job on %s" % (MRJobClass.__name__, loc)
    mrJob = MRJobClass(args=argsArr)
    runner = mrJob.make_runner()
    runner.run()
    print "finished %s job" % MRJobClass.__name__
    return mrJob, runner
    
def runParallelJob(MRJobClass, argsArr):            #TO DO: add threading to allow jobs to run in 
    pass                                                #parallel 
    #launch a new thread
    #call runJob(MRJobClass, argsArr) on the new thread

if __name__ == '__main__':
# pass in file from outside
#    MRWordFrequencyCount.run()
#setup file here
    mr_job, runner = runJob(MRSortByString,["C:\data\sortdata.txt"],"local")
    print "Sorting sortdata.txt"
    for line in runner.stream_output(): 
        key, value = mr_job.parse_output_line(line)
        print "%s: %s "%(key,value)


WARNING:mrjob.job:mr() is deprecated and will be removed in v0.6.0. Use mrjob.step.MRStep directly instead.
WARNING:mrjob.job:mr() is deprecated and will be removed in v0.6.0. Use mrjob.step.MRStep directly instead.
WARNING:mrjob.job:mr() is deprecated and will be removed in v0.6.0. Use mrjob.step.MRStep directly instead.
WARNING:mrjob.job:mr() is deprecated and will be removed in v0.6.0. Use mrjob.step.MRStep directly instead.
WARNING:mrjob.job:mr() is deprecated and will be removed in v0.6.0. Use mrjob.step.MRStep directly instead.
starting MRSortByString job on local
finished MRSortByString job
Sorting sortdata.txt
1: 1 
10: 6 
11: 7 
2: 4 
4: 2 
5: 5 
7: 4 
8: 3 

Note the second column is reported by their string values