hxmhh 2023-05-31 21:26 采纳率: 0%
浏览 30

Python抓取不到搜索结果,急

最近想爬取一个日文网站的搜索结果,
https://shop.keionet.com/search/index.html#reorder
不知道为什么抓取不到数据,显示的都是0条数据

python来抓取
请指教一下,谢谢


from bs4 import BeautifulSoup
import requests

def main():
    #
    url_items = "https://shop.keionet.com/search/index.html"

    # 获取加密后的数据
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
        # "Cookie": "D94FECC2CEBF0B0993D4F1A703477984.koec-a; AUI=1b82b14046cbc43a56750512ada793b81ebd06f45311c1b8f29390425017beee;",
        "Cookie": "JSESSIONID=653F73FC30ACAE0F025439D2572B08AB.koec-a;",
        # "Cookie": "JSESSIONID=4276D482EBB4B4573F8E7134CD5C1408.koec-a",
        # "Cookie": cookies_str,
    }
    session = requests.session()
    # session.get(url=url_items,headers=headers)
    # print("直接获取cookie:", session.cookies, type(session.cookies))
    # print(session.cookies.get("JSESSIONID"))
    #
    # cookies_str="JSESSIONID="+session.cookies.get("JSESSIONID")
    # print(cookies_str)
    # #
    # # session = requests.session()
    # # session.get(url_items)
    # html_set_cookie = requests.utils.dict_from_cookiejar(session.cookies)
    # print(html_set_cookie)

    # requests.getでHTMLを取得

    item_data = {
        "indexForm:keyword": "AA",
        "indexForm/view/front/search/index.html": "indexForm",
    }

    r_post = session.post(url_items, headers=headers, data=item_data)
    # print(r_post.text)
    # print(r_post.cookies)  # 看cookie

    soup = BeautifulSoup(r_post.content, "html.parser")
    # print(soup)
    wc = soup.find(class_="itemGroupThumbs")
    print(wc)
    #
    # print("直接获取cookie:", session.cookies, type(session.cookies))
    # print(session.cookies.get("JSESSIONID"))

    # cookies_str = "JSESSIONID=" + session.cookies.get("JSESSIONID")
    # print(cookies_str)

    # wc_name=wc.find(class_="name")
    # print(wc_name)
    # wc_price = wc.find(class_="retail_price")
    # print(wc_price)
    # wc_icon = wc.find(class_="icon")
    # print(wc_icon)
    # ws = [i.strip() for i in wc.text.splitlines()]
    # print(ws)

    # print(r_post.text)
    # return (r_post)
    # col1 = r_post.get('AuthenticationResult')
    # col2 = col1.get('IdToken')
    # print(col2)

if __name__ == "__main__":
    main()

  • 写回答

2条回答 默认 最新

  • 于扶摇 2023-05-31 22:40
    关注

    您可能没有正确地解析搜索结果。如果这是问题所在,您需要仔细查看网页源代码,并确定正确的搜索结果格式。

    评论

报告相同问题?

问题事件

  • 创建了问题 5月31日