
统计一个文件中单词出现的次数
map阶段的代码
reduce阶段的代码
driver阶段的代码
测试结果
Map 阶段
def map_phase(filename):
word_count = {}
with open(filename, 'r') as file:
for line in file:
words = line.strip().split()
for word in words:
word = word.lower() # 将单词转换为小写,以保证统计时的一致性
word_count[word] = word_count.get(word, 0) + 1
return word_count
Reduce 阶段
def reduce_phase(mapped_data):
reduced_data = {}
for word_count in mapped_data:
for word, count in word_count.items():
reduced_data[word] = reduced_data.get(word, 0) + count
return reduced_data
Driver 阶段
def driver_phase(filenames):
mapped_data = []
for filename in filenames:
mapped_data.append(map_phase(filename))
reduced_data = reduce_phase(mapped_data)
return reduced_data