在使用带参数的get请求时报错,具体脚本如下:
import requests,os
class BaiduSpider(object):
os_path = os.getcwd()+'/图片/'
if not os.path.exists(os_path):
os.mkdir(os_path)
def __init__(self):
self.url = 'https://image.baidu.com/search/albumsdetail?'
self.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'}
self.input_name = input('请输入你想爬取的相册名称:<例如:建筑>')
self.input_page = input('请输入你想爬取的页数:<例如:5>')
def parse_requests_url(self):
for page in range(int(self.input_page)):
params = {'pn': f'{30*(page+1)}'
,'rn': '30'
,'tn': 'albumsdetail'
,'word': '人物'
,'album_tab': '建筑'
,'album_id': '3'
,'ic': '0'
,'curPageNum': f'{1*(page+1)}' }
response = requests.get(self.url,headers=self.headers,params = params)
self.parse_response_data(response)
def parse_response_data(self,response):
data = response.json()
data_list = data['albumdata']['linkData'] #列表
#循环列表,取每一个元素中的图片地址
for i in data_list:
pic_url = i['thumbnailUrl']
pic_id = i['contSign']
# 发送获取图片的请求,获取相应,解析内容
data = requests.get(pic_url).content
self.parse_save_date(data,pic_id)
def parse_save_data(self,data,pic_id):
with open(f'{self.os_path}{self.input_name}{pic_id}.jpg','wb') as f:
f.write(data)
print(f'{pic_id}---------保存已完成!!!')
if __name__ == '__main__':
t = BaiduSpider()
t.parse_requests_url()
运行结果:
JSONDecodeError: Expecting value: line 1 column 1 (char 0)