package com.position.clean;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
/**
* @author zyb
* @date 2021/6/15 - 17:28
*/
public class CleanJob {
public static String deleteString(String str, char delchar) {
StringBuffer stringBuffer = new StringBuffer("");
for (int i = 0; i < str.length(); i++) {
if (str.charAt(i) != delchar) {
stringBuffer.append(str.charAt(i));
}
}
return stringBuffer.toString();
}
public static String mergeString(String position, JSONArray company)throws JSONException{
String result = "";
if (company.length() != 0) {
for (int i = 0; i < company.length(); i++) {
result = result + company.get(i) + "-";
}
}
if (position != "") {
String[] positionList = position.split("|;|, |、|,|; |/");
for (int i = 0; i < positionList.length; i++) {
result = result + positionList[i].replaceAll("[\\pP\\p{Punct}]","")+"-";
}
}
return result.substring(0,result.length()-1);
}
public static String killResult(JSONArray killData) throws JSONException{
String result = "";
if (killData.length() != 0) {
for (int i = 0; i < killData.length(); i++) {
result = result + killData.get(i) + "-";
}
result = result.substring(0, result.length()-1);
}
return result;
}
public static String resultTOString(JSONArray jobdata)throws JSONException{
String jobResultData = "";
for (int i = 0; i < jobdata.length(); i++ ) {
String everyData=jobdata.get(i).toString();
JSONObject everyDataJson = new JSONObject(everyData);
String city = everyDataJson.getString("city");
String salary = everyDataJson.getString("salary");
String positionAdvantage = everyDataJson.getString("positionAdvantage");
JSONArray skillLables = everyDataJson.getJSONArray("skillLables");
JSONArray companyLabelList = everyDataJson.getJSONArray("companyLabelList");
String salaryNew = deleteString(salary,'k');
String welfare = mergeString(positionAdvantage,companyLabelList);
String kill = killResult(skillLables);
if (i == jobdata.length()-1) {
jobResultData = jobResultData + city + "," +salaryNew + "," + welfare + "," + kill;
}else {
jobResultData = jobResultData + city + "," +salaryNew + "," + welfare + "," + kill + "/n";
}
}
return jobResultData;
}
}
package com.position.clean;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import java.io.IOException;
/**
* @author zyb
* @date 2021/6/15 - 17:28
*/
public class CleanMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String jobResultData = "";
String reptileData = value.toString();
String jobData = reptileData.substring(
reptileData.indexOf("=",reptileData.indexOf("=")+1)+1,
reptileData.length()-1);
try {
JSONObject contentJson = new JSONObject(jobData);
String contentData = contentJson.getString("content");
JSONObject positionResultJson = new JSONObject(contentData);
String positionResultData =
positionResultJson.getString("positionResult");
JSONObject resultJson = new JSONObject(positionResultData);
JSONArray resultData = resultJson.getJSONArray("result");
jobResultData = CleanJob.resultTOString(resultData);
context.write(new Text(jobResultData), NullWritable.get());
} catch (JSONException e) {
e.printStackTrace();
}
}
}
package com.position.clean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.BasicConfigurator;
import java.io.IOException;
/**
* @author zyb
* @date 2021/6/15 - 17:28
*/
public class CleanMain {
public static void main(String[] args) throws IOException,
ClassNotFoundException, InterruptedException {
BasicConfigurator.configure();
Configuration conf = new Configuration();
Job job = new Job(conf, "job");
job.setJarByClass(CleanMain.class);
job.setMapperClass(CleanMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, new Path("hdfs://centos01:9000//JobData//20210615"));
FileOutputFormat.setOutputPath(job, new Path("E:/IDEA/out1"));
//FileInputFormat.setInputPaths(job,new Path(args[0]));
//FileOutputFormat.setOutputPath(job,new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
在IDEA上 跑不起来 本地测试不生成文件
不会报错但是会出现这种情况,求大佬们指点谢谢了