1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| """A more advanced Reducer, using Python iterators and generators."""
from itertools import groupby from operator import itemgetter import sys
def read_mapper_output(file, separator='\t'): for line in file: yield line.rstrip().split(separator, 1)
def main(separator='\t'): data = read_mapper_output(sys.stdin, separator=separator) for current_word, group in groupby(data, itemgetter(0)): try: total_count = sum(int(count) for current_word, count in group) print "%s%s%d" % (current_word, separator, total_count) except ValueError: pass
if __name__ == "__main__": main()
|