hadoop 设置输出文件格式，job.setOutputFormatClass(SequenceFileOutputFormat.class); 报错

hadoop 2.9.2 版本
设置输出输出文件格式时，可以
job.setOutputFormatClass(TextOutputFormat.class); // 默认的输出组件
但是，job.setOutputFormatClass(SequenceFileOutputFormat.class); 这样时，会报错。看其他人的博客，可以这样设置。

错误提示：

The method setOutputFormatClass(Class<? extends OutputFormat>) in the type Job is not applicable for the arguments (Class)

package cn.edu360.mr.indexSequance;

import java.io.File;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


/**
 * 构建倒排索引，产生结果; hello a.txt->3 b.txt->4
 * 
 * 分两步，
 * 一  
 *    map： 产生K： 单词+文档   V：次数（1）
 *    reduce： 产生  K:单词+文档  V：总次数
 *    
 *二
 *    map: 产生 K:单词    V：文档  在文档中出现的次数
 *    reduce ： 输出            单词              文档， 文档中出现的次数  
 *    
 *    
 * @author Administrator
 *
 */
public class IndexStep1 {


    /**
     * 分割单词，
     * 输出  单词+文档      次数 1
     * @author Administrator
     *
     */
    public static class IndexMapper1 extends Mapper<LongWritable, Text, Text, IntWritable> {

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            // 获取maptask所处理的数据任务上下文信息：             文件路径， 偏移量范围
            //  若访问数据为数据库，则为：    库名， 表名, 行范围
            InputSplit inputSplit = context.getInputSplit();  // InputSplit为抽象类

            FileSplit fileSplit = (FileSplit) inputSplit;  // 强转inputSplit为FileSplit类型，   FileSplit为InputSplit的实现类，针对文件

            String filename = fileSplit.getPath().getName(); // 获取文件名

            String line = value.toString();
            String[] words = line.split(" ");
            for (String word : words) {

                word = format(word);
                if (word.length() >2) {
                    context.write(new Text(word + "-" + filename), new IntWritable(1));
                }

            }
        }

    }


    /**
     * 统计文档+单词
     * 
     * 输出： K：文档+单词   V： 单词在该文档中出现的总次数
     * @author Administrator
     *
     */
    public static class IndexReduce1 extends Reducer<Text, IntWritable, Text, IntWritable> {

        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

            int count = 0;  
            for (IntWritable value : values) {
                count = count + value.get();
            }   
            context.write(key, new IntWritable(count));
        }

    }





    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration configuration = new Configuration();

        JobConf jobConf = new JobConf(configuration);

        Job job = Job.getInstance(configuration);

        job.setMapperClass(IndexMapper1.class);
        job.setReducerClass(IndexReduce1.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);


        File file = new File("d:\\testOut");
        if (file.exists()) {
            deleteDir(file);
        }


        job.setOutputFormatClass(TextOutputFormat.class);  // 默认的输出组件

        job.setOutputFormatClass(SequenceFileOutputFormat.class);



        FileInputFormat.setInputPaths(job, new Path("d:\\test1"));
        FileOutputFormat.setOutputPath(job, new Path("d:\\testOut"));

        job.waitForCompletion(true);




    }


    // 使用正则表达式去除标点符号,大写转小写
    public static String format(String s) {
        String str = s.replaceAll("\\pP|\\pS", "");
        return str.toLowerCase(); // 大写转小写
    }


    /**
     * 递归删除文件夹及文件下的所有文件
     * @param dir
     * @return
     */
    private static boolean deleteDir(File dir) {
        if (dir.isDirectory()) {
            String[] children = dir.list();
              //  递归删除目录中的子目录下
            for (int i=0; i<children.length; i++) {
                boolean success = deleteDir(new File(dir, children[i]));
                if (!success) {
                    return false;
                }
            }
        }
        // 目录此时为空，可以删除
        return dir.delete();
    }

}

写回答
好问题 0 提建议
关注问题
分享
邀请回答
编辑收藏删除结题
收藏举报

1条回答默认最新

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
你知我知皆知 2024-08-10 11:06
关注
以下回答参考 皆我百晓生、券券喵儿 等免费微信小程序相关内容作答，并由本人整理回复。

根据您提供的代码和错误消息，SequenceFileOutputFormat 类型是 org.apache.hadoop.mapred.SequenceFileOutputFormat，而不是 org.apache.hadoop.mapred.TextOutputFormat。这意味着您正在尝试使用一个不支持的方法来设置输出格式。为了修复此问题，请确保您正确地设置了输出格式，并且方法名称匹配正确的类。

如果您的目标是将结果存储到序列化文件中，您可以尝试以下更改：

job.setOutputFormatClass(SequenceFileOutputFormat.class);

这应该解决错误并使程序正常运行。
解决无用
评论打赏
分享
举报

评论

按下Enter换行，Ctrl+Enter发表内容

报告相同问题？

关注问题

关于hadoop job中job.setOutputFormatClass和FileOutputFormat.setOutputPath的报错问题
2019-02-27 11:26

L.ZZ的博客 1. job.setOutputFormatClass(TextOutputFormat.class) # TextOutputFormat的为新的API中org.apache.hadoop.mapreduce.lib.output.TextOutputFormat job.setOutputFormatClass(TextOutputFormat.class); 2....
Hadoop支持的文件格式之SequenceFile
2019-12-20 14:24

邵奈一的博客文章目录0x00 文章内容0x01 SequenceFile格式概念1. SequenceFile是啥0x02 编码实现1.... 执行写SequenceFile文件格式代码3. 执行读SequenceFile文件格式代码4. 执行写SequenceFile文件格式代码...
Hadoop-MapReduce的自定义输入输出
2023-05-09 20:44

研发咨询顾问的博客因为读取的就是文本类型的，而默认TextIntputFormat,所以略写，*/获取job里面传递的输出目录，并且job的输出路径的产生下面自定义的文件名。如果输入有二进制,就使用下面二进制流,如果没有,就不用写,
hadoop配置文件，参数的优先级
2025-04-22 10:50

Re.月见草的博客参数优先级排序：（1）客户端代码中设置的值（2）ClassPath下的用户自定义的配置文件（project下的配置文件，例如/usr/local/hadoop/etc/hadoop/hdfs-site.xml）（3）服务器的自定义配置文件（XXX-site.xml路径为/...
java hadoop getmerge_hadoop文件合并
2021-03-16 15:56

水龙敬的博客众所周知，Hadoop对处理单个大文件比处理多个小文件更有效率，另外单个文件也非常占用HDFS的存储空间。所以往往要将其合并起来。1，getmergehadoop有一个命令行工具getmerge，用于将一组HDFS上的文件复制到本地...
hadoop---自定义输出文件格式以及输出到不同目录
2016-07-13 01:08

学战到底的博客转自： hadoop编程小技巧（7）---自定义输出文件格式以及输出到不同目录，保存在此以学习。代码测试环境：Hadoop2.4 应用场景：当需要定制输出数据格式时可以采用此技巧，包括定制输出数据的展现形式，输出路径...
CC00047.hadoop——|Hadoop&MapReduce.V20|——|Hadoop.v20|MapReduce综合案例.v01|
2022-04-07 14:13

yanqi_vip的博客 ~~~ 将数据输出到不同的文件目录下，数据内容如下，其中数据第九个字段表示好评， ~~~ 中评，差评。0：好评，1：中评，2：差评。 ~~~ 现需要根据好评，中评，差评把数据分类并输出到不同的目录中,并且要求按照时...
3.Hadoop之MapReduce
2022-04-03 21:26

hutc_Alan的博客 MapReduce是一个分布式运算程序的编程框架，用于用户开发“基于Hadoop的数据分析应用”的核心框架。 MapReduce核心功能是将用户编写的业务逻辑代码和自带默认组件整合成一个完整的分布式运算程序，并发运行在一个...
CC00045.hadoop——|Hadoop&MapReduce.V18|——|Hadoop.v18|InputFormat机制|自定义outputForma|
2022-04-07 14:12

yanqi_vip的博客一、[InputFormat机制之自定义outputFormat]：...~~~ OutputFormat:是MapReduce输出数据的基类， ~~~ 所有MapReduce的数据输出都实现了OutputFormat抽象类。 ~~~ 下面我们介绍几种常见的OutputFormat子类 ### --...
mapreduce替换默认的文本为sequence文件(sixteen day first)
2019-04-11 17:09

高辉的博客 sequence file（hadoop专设）存的是key-valuekey-valuekey-valuekey-value...普通文本文件需要拿字符串切自己想要的东西，而此文件可以直接读一对key-value ，中间环节产生的文件就可以用这种文件格式，下一个环...
没有解决我的问题, 去提问

hadoop 设置输出文件格式，job.setOutputFormatClass(SequenceFileOutputFormat.class); 报错

1条回答 默认 最新

1条回答默认最新