def get_Html(url):
"""requests到url的HTML"""
chrome_options = ChromeOptions()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(options=chrome_options)
try:
driver.get(url)
driver.maximize_window()
time.sleep(1)
source = driver.find_element_by_id('nc_1_n1z')
action = ActionChains(driver)
action.click_and_hold(source).perform()
distance = 300
tracks = get_tracks(distance)
# for i in tracks:
# action.move_by_offset(xoffset=i, yoffset=0).pause(0.1).perform()
# time.sleep(0.5)
i = 0
while i <= distance:
action.move_by_offset(xoffset=60, yoffset=0).pause(0.1).perform()
i += 60
action.release().perform()
finally:
driver.get(driver.current_url)
time.sleep(1)
# driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
# driver.implicitly_wait(20)
target = driver.find_element_by_css_selector(".footer")
driver.execute_script("arguments[0].scrollIntoView();", target)
source = driver.page_source
html1 = etree.HTML(source)
return html1
滑块验证次数多了会验证不通过,但人去拖动就没问题,window.navigator.webdriver浏览器控制台显示的是false,在网上搜了可能是Chrome浏览器驱动文件(对windows而言就是对应版本的chromedriver.exe)中的【特征字符串】被网站截获,判断出是爬虫所为。
但不知道是不是其他问题,请教一下各位同学