python学习使用库和函数和制作库。不是很懂,
库部分代码如下:
import requests
from lxml import etree
import os
import time
async def coffee_dou_requests(u,i,p,j):
a = float(0)
for c in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19',
'20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33']:
url = u
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36'
}
b = '.jpg'
response = requests.get(url=url, headers=headers)
page_text = response.text
tree = etree.HTML(page_text)
li_list = tree.xpath(i)
if not os.path.exists(f'./{p}'):
os.mkdir(f'./{p}')
for li in li_list:
a = a + 1
img_src = li.xpath(j)[0]
img_name = (f'{a}+{b}')
print(f"'{img_src}'")
# img_name = img_name.encode('iso-8859-1').decode('gbk')
img_data = requests.get(url=img_src, headers=headers).content
img_path = f'{p}/' + img_name
with open(img_path, 'wb') as fp:
fp.write(img_data)
print(img_name, img_src, '爬取完毕!!')
return a
#u为url
#i为大体位置
#p为文件名称
#j为具体位置
程序代码如下:
from coffee_dou_requests import coffee_dou_requests
import asyncio
#u=input("请输入url或url格式:")
#i=input("请输入宏观位置(类型为xpath):")
#p=input("请输入文件名称:")
#j=input("请输入爬取内容具体位置(类型为xpath):")
u='https://www.xmqmnet.com/meinvtupian/' + 'index-' + '2' + '.html'
i='//div[@class="main"]/div[2]/ul/li'
p="图片"
j='./div/a/img/@src'
tasks = []
b=coffee_dou_requests(u,i,p,j)
报错为:sys:1: RuntimeWarning: coroutine 'coffee_dou_requests' was never awaited
想达到的效果为:导入coffee_dou_requests的这个包后,输入对应值,函数自动爬取对应图片