报错:
Error: java.lang.StringIndexOutOfBoundsException: String index out of range: 4
代码:
package mprd;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
public class zhengwenshuang_CityMapJoinDemo {
public static void main(String[] args) throws Exception{
//判断输入路径
if(args.length!=3 || args==null) {
System.err.println("Please Input Full Path!");
System.exit(1);
}
//创建job
Job job = Job.getInstance(new Configuration(),zhengwenshuang_CityMapJoinDemo.class.getSimpleName());
job.setJarByClass(zhengwenshuang_CityMapJoinDemo.class);
//通过MUltipleInputs多输入的方式添加多个Map的处理类
MultipleInputs.addInputPath(job,new Path(args[0]),
TextInputFormat.class,AllCity.class);
MultipleInputs.addInputPath(job, new Path(args[1]),
TextInputFormat.class,SomeCity.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//设置Reducer阶段的处理类
job.setReducerClass(CityReduce.class);
job.waitForCompletion(true);
}
//处理所有城市的map
// <济南,a_济南>
//<青岛,a_青岛>
//<德州,a_德州>
static class AllCity extends Mapper<LongWritable,Text,Text,Text>{
public static final String LABEL="a_";
protected void map(LongWritable key,Text value,org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text,Text>.Context context)
throws java.io.IOException,InterruptedException{
String cityName=value.toString();
context.write(new Text(cityName),new Text(LABEL+cityName));
};
}
//处理只有飞机场的城市
// <济南,s_济南 济南飞机场>
//<青岛,s_青岛 青岛飞机场>
static class SomeCity extends Mapper<LongWritable,Text,Text,Text>{
public static final String LABEL="s_";
protected void map(LongWritable key,Text value,org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text,Text>.Context context)
throws java.io.IOException,InterruptedException{
String[] lines = value.toString().split("\t");
String cityName=lines[0];
context.write(new Text(cityName),new Text(LABEL+value.toString()));
};
}
//经过shuffle之后变成:
//<济南,{a_济南,s_济南 济南飞机场}>
//<德州,{a_德州}>
//<青岛,{a_青岛,s_青岛 青岛飞机场}>
static class CityReduce extends Reducer<Text,Text,Text,Text>{
protected void reduce(Text key,java.lang.Iterable<Text>values,org.apache.hadoop.mapreduce.Reducer<Text,Text,Text,Text>.Context context)
throws java.io.IOException,InterruptedException{
//城市的名字
String cityName=null;
//存放符合条件过滤出来的城市
List<String> list=new ArrayList<String>();
for (Text value:values) {
//如果列表中包含有s_开头的数据,则表明该数据是已经有飞机场的城市
if(value.toString().startsWith(SomeCity.LABEL)) {
int index=value.toString().indexOf("_");
cityName=value.toString().substring(index+1,index+3);
}else if(value.toString().startsWith(AllCity.LABEL)) {
list.add(value.toString().substring(2));
}
}
//如果城市名为空并且list列表中有值,则列表中的值是符合条件的数据
if(cityName==null&& list.size()>0) {
for(String str : list) {
context.write(new Text(str), new Text(" "));
}
}
};
}
}
错误信息:
22/05/14 04:17:16 INFO mapreduce.Job: map 100% reduce 0%
22/05/14 04:17:34 INFO mapreduce.Job: Task Id : attempt_1652445884344_0010_r_000000_0, Status : FAILED
Error: java.lang.StringIndexOutOfBoundsException: String index out of range: 4
at java.lang.String.substring(String.java:1963)
at mprd.zhengwenshuang_CityMapJoinDemo$CityReduce.reduce(zhengwenshuang_CityMapJoinDemo.java:83)
at mprd.zhengwenshuang_CityMapJoinDemo$CityReduce.reduce(zhengwenshuang_CityMapJoinDemo.java:72)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Container killed by the ApplicationMaster.
Container killed on request. Exit code is 143
Container exited with a non-zero exit code 143