问题遇到的现象和发生背景
编写爬虫类的时候,代码中的 WriteLocal、SaveData总是标红
用代码块功能插入代码,请勿粘贴截图
import cn.edu.hfut.dmic.webcollector.model.CrawlDatums;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.edu.hfut.dmic.webcollector.plugin.ram.RamCrawler;
import org.json.*;
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.*;
public class EvaluationInfoCrawler extends RamCrawler {//评论信息爬取并存储(主类)
static List<String> urlArr = new ArrayList<String>( );
static String platform;
static String model;
static String crawlTime;
static double price;
static Parser infoParser;
public EvaluationInfoCrawler(String url){
this.addSeed(url) ;
}
public void visit(Page page, CrawlDatums next) {
String doc = page.html();
if (!doc.isEmpty()) {
JSONObject json = new JSONObject(doc);
json.append("price", price);
if ((!json.optString(" comments").isEmpty()) && (!json.optJSONArray(" conments").isNull(0))) {
WriteLocal.write(json.toString());
infoParser.start(json.toString(), platform, model, crawlTime);
}
}
}
public static void main( String[] args) throws Exception {
if (args.length != 3 && args.length != 4 && args.length != 6){
System.err.println("Usage:java -jar EvaluationCrawler.jar <skId> <model> <localPath>");
System.err.println("java -jar EvaluationCrawler.jar <skld> <model> <localPath> <dbSrvIP>");
System.err.println(" java -jar EvaluationCrawler.jar <skld> <model> <localPath> <dlbSrvIP> <dbUser> <pwd>");
System.exit(2);
}
String skId = args[0];//商品编号(商品URL的skid)
EvaluationInfoCrawler.model = args[1];//手机型号
String localDir = args[2];//爬取数据解析存本地位置
File file = new File(localDir);
if ((!file.exists()) || file.isFile()) {
file.mkdirs();
}
EvaluationInfoCrawler.platform = "京东";
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyMMddHHmmss");
EvaluationInfoCrawler.crawlTime = dateFormat.format(new Date());
EvaluationInfoCrawler.infoParser = new Parser();
EvaluationInfoCrawler.price = new PriceCrawler(skId).getPrice();
int number = new EvaluateCountCrawler(skId).getNumber();
if (args.length == 3) {
SaveData.getConnection();
} else if (args.length == 4) {//srvIP
SaveData.getConnection(args[3]);
} else if (args.length == 6) {//srvIP, user, pwd
SaveData.getConection(args[3], args[4], args[5]);
} else {
System.exit(2);
}
WriteLocal.setWriter(localDir + File.separator + EvaluationInfoCrawler.crawlTime + "_ json.txt");
for (int page = 0; page <= Math.ceil(number / 10.0) - 1; page++) {//分页--深度爬取
String nextUrl = "http://club.jd.com/productpage/p-" + skId + "-s-O-t-3-p-" + page + ".html";
new EvaluationInfoCrawler(nextUrl).start();//继续爬取下一页
}
if (urlArr.size() > 0) {
for (String url : urlArr) {
new EvaluationInfoCrawler(url).start();
}
}
WriteLocal.close();
SaveData.close();
String source = EvaluationInfoCrawler.crawlTime + "\t" + skId + "\t" + Math.ceil(number / 10.0) + "\n";
File flagFile = new File(localDir + "/SUCCESS");
BufferedWriter bw = new BufferedWriter(new FileWriter(flagFile, true));
bw.write(source);
bw.flush();
bw.close();
}
}