import requests
from bs4 import BeautifulSoup
import csv
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
def get_html(url):
html = requests.get(url, headers=headers)
html.encoding = 'utf-8'
return html.text
def parse_html(html):
html_url = BeautifulSoup(html, 'lxml')
conMidtab1 = html_url.find('div', id="container")
lst = []
for conMidtab2 in conMidtab1:
dic = {}
a = conMidtab2.find('a', target="blank")
lj = a.get('href')
mz = a.get('alt')
dic['图片名'] = mz
dic['图片链接'] = lj
lst.append(dic)
return lst
def save_data(lst):
with open('tupian.csv', 'w', encoding='utf-8', newline='') as f:
writer = csv.DictWriter(f, fieldnames=['图片名', '图片链接'])
writer.writeheader()
writer.writerows(lst)
moxie_content = []
for i in range(1, 11):
if i == 1:
url = 'https://sc.chinaz.com/tupian/index.html'
else:
url = f'https://sc.chinaz.com/tupian/index{i}.html'
html = get_html(url)
moxie_content = moxie_content + parse_html(html)
save_data(moxie_content)
求各位帮我看看这个怎么改
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
1条回答 默认 最新
- 天际的海浪 2022-05-31 01:40关注
你题目的解答代码如下:
import requests from bs4 import BeautifulSoup import csv headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' } def get_html(url): html = requests.get(url, headers=headers) html.encoding = 'utf-8' return html.text def parse_html(html): html_url = BeautifulSoup(html, 'lxml') conMidtab1 = html_url.select('#container>div>div>a[target="_blank"]') lst = [] for a in conMidtab1: dic = {} lj = a.get('href') mz = a.get('alt') dic['图片名'] = mz dic['图片链接'] = "https:"+lj lst.append(dic) print(dic) return lst def save_data(lst): with open('tupian.csv', 'w', encoding='utf-8', newline='') as f: writer = csv.DictWriter(f, fieldnames=['图片名', '图片链接']) writer.writeheader() writer.writerows(lst) moxie_content = [] for i in range(1,11): if i == 1: url = 'https://sc.chinaz.com/tupian/index.html' else: url = f'https://sc.chinaz.com/tupian/index_{i}.html' html = get_html(url) moxie_content = moxie_content + parse_html(html) save_data(moxie_content)
如有帮助,请点击我的回答下方的【采纳该答案】按钮帮忙采纳下,谢谢!
本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 1无用
悬赏问题
- ¥15 Python中的request,如何使用ssr节点,通过代理requests网页。本人在泰国,需要用大陆ip才能玩网页游戏,合法合规。
- ¥100 为什么这个恒流源电路不能恒流?
- ¥15 有偿求跨组件数据流路径图
- ¥15 写一个方法checkPerson,入参实体类Person,出参布尔值
- ¥15 我想咨询一下路面纹理三维点云数据处理的一些问题,上传的坐标文件里是怎么对无序点进行编号的,以及xy坐标在处理的时候是进行整体模型分片处理的吗
- ¥15 CSAPPattacklab
- ¥15 一直显示正在等待HID—ISP
- ¥15 Python turtle 画图
- ¥15 stm32开发clion时遇到的编译问题
- ¥15 lna设计 源简并电感型共源放大器