问题遇到的现象和发生背景
在一个有4台服务器的系统(zookeeper负责协调)中创建hbase表后,上传jar包处理表的内容。
问题相关代码
pom.xml部分:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>MyHBase</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hadoop.version>2.7.7</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase</artifactId>
<version>2.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-mapreduce</artifactId>
<version>2.0.2</version>
</dependency>
</dependencies>
</project>
MemberMapper.java部分:
package org.szc2019211234.hbase.inputSource;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import java.io.IOException;
public class MemberMapper extends TableMapper<Writable, Writable> {
private Text k = new Text();
private Text v = new Text();
public static final String FIELD_COMMOM_separator = "\u0001";
@Override
protected void setup(Context context) throws IOException,InterruptedException{}
@Override
protected void map(ImmutableBytesWritable row, Result columns, Context context) throws IOException,InterruptedException{
String value = null;
String rowkey = new String(row.get());
byte[] columnFamily = null;
byte[] columnQualifier = null;
long ts = 0L;
try{
for(Cell cell : columns.listCells()){
value = Bytes.toStringBinary(cell.getValueArray());
columnFamily = cell.getFamilyArray();
columnQualifier = cell.getQualifierArray();
ts = cell.getTimestamp();
k.set(rowkey);
v.set(Bytes.toString(columnFamily)+FIELD_COMMOM_separator+Bytes.toString(columnQualifier)+FIELD_COMMOM_separator+
value+FIELD_COMMOM_separator+ts);
context.write(k, v);
}
}catch(Exception e){
e.printStackTrace();
System.err.println("Error:"+e.getMessage()+",Row:"+Bytes.toString(row.get())+",Value"+value);
}
}
}
Main.java部分:
package org.szc2019211234.hbase.inputSource;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class Main {
static final Log LOG = LogFactory.getLog(Main.class);
public static final String NAME = "Member Test1";
public static final String TEMP_INDEX_PATH = "HDFS://szc-2019211234-0001:8020/tmp/2019211234szc";
public static String inputTable = "2019211234szc";
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
Scan scan = new Scan();
scan.setBatch(0);
scan.setCaching(10000);
scan.setMaxVersions();
scan.setTimeRange(System.currentTimeMillis() - 3*24*3600*1000L,System.currentTimeMillis());
scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("keyword"));
conf.setBoolean("mapred.map.tasks.speculative.execution",false);
conf.setBoolean("mapred.reduce.tasks.speculative.execution",false);
Path tmpIndexPath = new Path(TEMP_INDEX_PATH);
FileSystem fs =FileSystem.get(conf);
if(fs.exists(tmpIndexPath)){
fs.delete(tmpIndexPath,true);
}
Job job = new Job(conf,NAME);
job.setJarByClass(Main.class);
TableMapReduceUtil.initTableMapperJob(inputTable,scan,MemberMapper.class, Text.class,Text.class,job);
job.setNumReduceTasks(0);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job,tmpIndexPath);
boolean success = job.waitForCompletion(true);
System.exit(success?0:1);
}
}
运行结果及报错内容
上传以上代码生成的jar包后执行命令:
hadoop jar MyHBase.jar org.szc2019211234.hbase.inputSource.Main
运行报错如下所示:
22/03/31 22:47:51 INFO mapreduce.Job: Job job_1648729191766_0002 failed with state FAILED due to: Application application_1648729191766_0002 failed 2 times due to AM Container for appattempt_1648729191766_0002_000002 exited with exitCode: 1
For more detailed output, check application tracking page:http://szc-2019211234-0001:8088/cluster/app/application_1648729191766_0002Then, click on links to logs of each attempt.
Diagnostics: Exception from container-launch.
Container id: container_1648729191766_0002_02_000001
Exit code: 1
Stack trace: ExitCodeException exitCode=1:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:585)
at org.apache.hadoop.util.Shell.run(Shell.java:482)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:776)
at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:212)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:823)
查看生成的错误日志,如下所示(引用模式一直报错故只能用java模板显示):
Container exited with a non-zero exit code 1
Failing this attempt. Failing the application.
22/03/31 22:47:51 INFO mapreduce.Job: Counters: 0
2022-04-23 22:39:13,997 INFO [main] org.apache.hadoop.service.AbstractService: Service org.apache.hadoop.mapreduce.v2.app.MRAppMaster failed in state INITED; cause: org.apache.hadoop.yarn.exceptions.YarnRuntimeException: java.io.IOException: No FileSystem for scheme: HDFS
org.apache.hadoop.yarn.exceptions.YarnRuntimeException: java.io.IOException: No FileSystem for scheme: HDFS
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$2.call(MRAppMaster.java:518)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$2.call(MRAppMaster.java:498)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.callWithJobClassLoader(MRAppMaster.java:1593)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.createOutputCommitter(MRAppMaster.java:498)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.serviceInit(MRAppMaster.java:284)
at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$5.run(MRAppMaster.java:1551)
at java.security.AccessController.doPrivileged(AccessController.java:774)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.initAndStartAppMaster(MRAppMaster.java:1548)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.main(MRAppMaster.java:1481)
Caused by: java.io.IOException: No FileSystem for scheme: HDFS
at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2658)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2665)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:93)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2701)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2683)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:372)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.<init>(FileOutputCommitter.java:113)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.<init>(FileOutputCommitter.java:88)
at org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.getOutputCommitter(FileOutputFormat.java:309)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$2.call(MRAppMaster.java:516)
... 11 more
End of LogType:syslog
我的解答思路和尝试过的方法
jps检测结果符合预期,zookeeper状态输出无异常,hdfs检测也未见明显异常。初步排除服务器自身问题。
根据网上对"No FileSystem for scheme: HDFS"的解决思路,在Main函数中添加了语句:
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
仍然未见明显效果。
另:
本次使用的hadoop版本号为2.7.7,在服务器的home/modules目录下有2.7.7版本和2.8.3两个版本的hadoop(hadoop路径仍然设置为2.7.7版本所在);在pom导入依赖时报错显示如下(但是主界面未标红且能够正常编译并生成jar包):
尚不清楚错误是否与此二者有关。
我想要达到的结果
希望能够找出错误所在并成功更正,使得hadoop命令能够顺利执行。