
def xiaohongshu():
url='https://www.xiaohongshu.com/explore'
options = webdriver.ChromeOptions() # 创建一个选项 最终运行按照选项执行
options.add_argument('--headless') # 隐藏浏览器
driver = webdriver.Chrome()
# 禁用webdriver检测的脚本
script = 'Object.defineProperty(navigator, "webdriver", {get: () => false,});'
driver.execute_script(script)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => false
})
"""
})
driver.get(url)
time.sleep(15)
# 拉到浏览器底部
# for _ in range(5):
# driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
# time.sleep(3)
# page_source = driver.page_source
page=int(request.args.get('page',''))
#选择板块
n=1
ls=[]
list1=[]
while page>0:
try:
title=driver.find_element(f'/html/body/div[1]/div[1]/div[2]/div[2]/div/div[2]/section[{n}]/div/div/a/span').text
print(title)
path=driver.find_element(By.XPATH,f'/html/body/div[1]/div[1]/div[2]/div[2]/div/div[2]/section[{n}]/div/a[2]/@href').text
path='https://www.xiaohongshu.com'+path
print(path)
print(title,path)
dict={'title':title,'path':path}
ls.append(dict)
n+=1
except:
# driver.execute_script("window.scrollTo(0,document.body.scrollHeight)") #拉到底
driver.execute_script("window.scrollBy(0, 900);") #向下啦900像素
time.sleep(2)
page-=1
ls=[list1.append(i) for i in ls if i not in list1]
print(ls)
# driver.quit()
return ls
