不需要任何修改,直接复制代码使用
# -*- coding: UTF-8 -*-
"""
@作 者:陆地起飞全靠浪
"""
import requests
from bs4 import BeautifulSoup
requests_headers = """
:authority: www.tupianzj.com
:method: GET
:path: /mingxingku/neidinv/list_229_1.html
:scheme: https
accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
accept-encoding: gzip, deflate, br
accept-language: zh-CN,zh;q=0.9
cache-control: max-age=0
cookie: cookiecookiecookiecookiecookiecookiecookie
if-modified-since: Fri, 25 Feb 2022 17:32:19 GMT
if-none-match: W/"621912a3-91ee"
referer: https://www.tupianzj.com/mingxingku/neidinv/list_229_1.html
sec-fetch-dest: document
sec-fetch-mode: navigate
sec-fetch-site: same-origin
sec-fetch-user: ?1
upgrade-insecure-requests: 1
user-agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36
"""
params = {}
for x in requests_headers.split('\n'):
x = x.strip()
if x == '':
continue
k, v = x.split(': ')
params[k]=v
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
url = 'https://www.tupianzj.com/mingxingku/neidinv/list_229_1.html'
r = requests.get(url=url, params=params, headers=headers)
r.encoding = 'UTF-8'
code = r.status_code
print(code)
text = r.text
soup = BeautifulSoup(text, 'html.parser')
ul = soup.find('ul', class_='list_con_mxbox_ul')
div = soup.find('div', class_='list_con_box')
print(ul)
li_list = ul.children
div_list = div.children