import urllib.request
from lxml import etree
def create_request(page):
if(page==1):
url='https://sc.chinaz.com/tupian/shanshuitupian.html'
else:
url ='https://sc.chinaz.com/tupian/shanshuitupian_'+str(page)+'.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0',
'Cookie':'cz_statistics_visitor = 0feda378 - 4c8f - a225 - 7a91 - 755c228c51fb;_clck = 1jn4eo3 % 7C2 % 7Cfkx % 7C0 % 7C1565;__gads = ID = b9ca106bba83c1c6:T = 1713111545:RT = 1713111545:S = ALNI_MYXYNBp9CApMOsDFxHnXQoocoLPXQ;__gpi = UID = 00000debfd2f75d9: T = 1713111545:RT = 1713111545:S = ALNI_MaWelFfZUAHAN3ee1wOLzGOFB05KA;__eoi = ID = bd66e9b7043bae01:T = 1713111545:RT = 1713111545:S = AA - Afjak04XLL - 3r2QGac86IgZ5W;_clsk = xn6u0n % 7C1713112855925 % 7C3 % 7C1 % 7Ch.clarity.ms % 2Fcollect;Hm_lvt_398913ed58c9e7dfe9695953fb7b6799 = 1713111487, 1713115404, 1713154098;Hm_lpvt_398913ed58c9e7dfe9695953fb7b6799 = 1713154098'
}
request = urllib.request.Request(url=url, headers=headers)
return request
def get_content(request):
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
return content
def down_load(content):
tree=etree.HTML(content)
name_list= tree.xpath('//div[@class="item masonry-brick"]//img/@alt')
pic_list= tree.xpath('//div[@class="item masonry-brick"]//img/@data-original')
for i in range(len(name_list)):
name =name_list[i]
pic =pic_list[i]
url='https:' +pic
urllib.request.urlretrieve(url=url,filename=name + '.jpg')
if __name__ == '__main__':
start_page=int(input('请输入起始页码'))
end_page=int(input('请输入结束页码'))
for page in range(start_page,end_page+1):
request=create_request(page)
content=get_content(request)
down_load(content)
不报错但是无法下载图片