最近想爬取一个日文网站的搜索结果,
https://shop.keionet.com/search/index.html#reorder
不知道为什么抓取不到数据,显示的都是0条数据
python来抓取
请指教一下,谢谢
from bs4 import BeautifulSoup
import requests
def main():
#
url_items = "https://shop.keionet.com/search/index.html"
# 获取加密后的数据
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
# "Cookie": "D94FECC2CEBF0B0993D4F1A703477984.koec-a; AUI=1b82b14046cbc43a56750512ada793b81ebd06f45311c1b8f29390425017beee;",
"Cookie": "JSESSIONID=653F73FC30ACAE0F025439D2572B08AB.koec-a;",
# "Cookie": "JSESSIONID=4276D482EBB4B4573F8E7134CD5C1408.koec-a",
# "Cookie": cookies_str,
}
session = requests.session()
# session.get(url=url_items,headers=headers)
# print("直接获取cookie:", session.cookies, type(session.cookies))
# print(session.cookies.get("JSESSIONID"))
#
# cookies_str="JSESSIONID="+session.cookies.get("JSESSIONID")
# print(cookies_str)
# #
# # session = requests.session()
# # session.get(url_items)
# html_set_cookie = requests.utils.dict_from_cookiejar(session.cookies)
# print(html_set_cookie)
# requests.getでHTMLを取得
item_data = {
"indexForm:keyword": "AA",
"indexForm/view/front/search/index.html": "indexForm",
}
r_post = session.post(url_items, headers=headers, data=item_data)
# print(r_post.text)
# print(r_post.cookies) # 看cookie
soup = BeautifulSoup(r_post.content, "html.parser")
# print(soup)
wc = soup.find(class_="itemGroupThumbs")
print(wc)
#
# print("直接获取cookie:", session.cookies, type(session.cookies))
# print(session.cookies.get("JSESSIONID"))
# cookies_str = "JSESSIONID=" + session.cookies.get("JSESSIONID")
# print(cookies_str)
# wc_name=wc.find(class_="name")
# print(wc_name)
# wc_price = wc.find(class_="retail_price")
# print(wc_price)
# wc_icon = wc.find(class_="icon")
# print(wc_icon)
# ws = [i.strip() for i in wc.text.splitlines()]
# print(ws)
# print(r_post.text)
# return (r_post)
# col1 = r_post.get('AuthenticationResult')
# col2 = col1.get('IdToken')
# print(col2)
if __name__ == "__main__":
main()