import requests from bs4 import BeautifulSoup url = 'https://699pic.com/qingnianshenghuo.html' resp = requests.get(url) resp.encoding='utf-8' main_page= BeautifulSoup(resp.text, 'html.parser') alist = main_page.find_all("div", class_="photo-tag") child_href_list=[] for a in alist: w=a.find("a") hrefs = "https:"+w.get("href") child_href_list.append(hrefs) for href in child_href_list: child_page_resp = requests.get(href) child_page_resp.encoding="utf_8" child_page_text = child_page_resp.text child_page=BeautifulSoup(child_page_text,"html.parser") p = child_page.find("a", class_="photo-img-link") img = p.find("img") print("https:"+img.get("src"))
主要是抓取的图片重复的太厉害,尤其是第一张图片,没有规律的循环重复……