我在获取某个网站的图片时,被限制了
我已经事先获取到的需要的图片的地址链接
然后在请求每个地址并保存到我自己的磁盘上也能成功
但是请求二三十次后就会被限制,报错为 Error: socket hang up,大概半小时后会解除限制
查了下这个错误说是在请求前加上 maxSockets的限制就行了,加了后还是不行
并且也尝试了设置动态ip和请求头,也还是不行,请问要怎么解决?
下面是详细代码
// 获取图片失败时会提示 Error: socket hang up
// 查了下说是maxSockets加限制就行了,所以这里加了这个
var http = require('http');
var https = require('https');
http.globalAgent.maxSockets = 1;
https.globalAgent.maxSockets = 1;
var request = require('request');
var log = require('single-line-log').stdout;
var fs = require("fs");
//浏览器库
const userAgents = [
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0) ,Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
"Opera/9.25 (Windows NT 5.1; U; en), Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"
];
//构造请求头-浏览器
function randomHead() {
return userAgents[
Math.floor(Math.random() * (0 - userAgents.length) + userAgents.length)
];
}
//构造请求头-ip
function returnIp() {
return (
Math.floor(Math.random() * (10 - 255) + 255) +
"." +
Math.floor(Math.random() * (10 - 255) + 255) +
"." +
Math.floor(Math.random() * (10 - 255) + 255) +
"." +
Math.floor(Math.random() * (10 - 255) + 255)
);
}
var arr = [{
imgs:[], //图片链接,一个元素就是一张图片的链接
title:'' //图片的标题
}]; //存放了一堆包含图片信息的数据
var urls = []; //存放arr中每个元素里的图片链接数组
var index = 1; //读取数组时的指针移动
var title = ''; //存放arr中每个元素里的图片标题
get_img();
// 循环调用,每次从队列中拿出一个图片数据
function get_img(){
var obj = arr.splice(0,1)[0];
if(obj==undefined){
console.log('');
console.log('获取数据结束');
return;
}
urls = obj.imgs;
title = obj.title;
index = 1;
get_img_url();
}
// 循环调用,每次从图片数据中的数组里取出一张图片的链接
function get_img_url(){
if(index==urls.length){
get_img();
return;
}
if(urls[index]==null){
index++;
get_img_url();
return;
}
get_manhua(urls[index], './static/' + title + '/' + index + '.jpg');
}
// 请求对方服务器上的该图片并保存到本地
function get_manhua(url, path){
log('正在获取' + title + ' ' + index + '.jpg');
request({
url:url,
method: 'GET',
pool:{maxSockets:1}, // 获取图片失败时会提示 Error: socket hang up 查了下说是maxSockets加限制就行了,所以这里加了这个
headers: {//动态ip和请求头
"User-Agent": randomHead(),
"X-Forwarded-For": returnIp()
}
}, function (err) {
if (!err){
index++;
setTimeout(get_img_url, 5000); //设置延时降低访问频率
}else{
console.log('');
console.log('获取' + title + ' ' + index + '.jpg 时失败:'+err);
console.log('');
setTimeout(get_img_url, 5000); //设置延时降低访问频率
}
}).pipe(fs.createWriteStream(path));
}