In [30]:
!head urls.txt


"/regwiz",1000
"/support",1001
"/athome",1002
"/kb",1003
"/search",1004
"/norge",1005
"/misc",1006
"/ie_intl",1007
"/msdownload",1008
"/windows",1009

In [50]:
%%writefile GlobalState.py

from mrjob.job import MRJob
import sys

class GlobalState(MRJob):
    def __init__(self):
        self.GlobalList = []
    
    def mapper_init(self):
        print("cat", file=sys.stderr)
    
    def mapper(self, _, lines):
        if "wi" in lines:
            self.GlobalList.append(lines[2:8])
        yield (lines, 1)
        
    def reducer(self, values, counts):
        pass
    
    def reducer_final(self):
        yield(self.GlobalList, 1)

    
if __name__ == "__main__":
    GlobalState.run()


Overwriting GlobalState.py

In [51]:
!head -n 10000 urls.txt | python GlobalState.py -r local -q


Traceback (most recent call last):
  File "GlobalState.py", line 25, in <module>
    GlobalState.run()
  File "/Users/BlueOwl1/anaconda/lib/python3.5/site-packages/mrjob/job.py", line 428, in run
    mr_job = cls(args=_READ_ARGS_FROM_SYS_ARGV)
TypeError: __init__() got an unexpected keyword argument 'args'