Zoeyzengli
Zoeyzengli
2019-04-02 14:00

selenium爬取招聘网站一直刷新主页,代码为什么爬不出来,超时怎么解决?

  • python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.common.exceptions import TimeoutException
from pyquery import PyQuery as pq

browser = webdriver.Firefox()
wait = WebDriverWait(browser, 10)

def search():
    try:
        browser.get( 'https://www.simplyhired.com/') #网站url
        browser.implicitly_wait(10)
        input = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "input.id_f_keywords:nth-child(1)"))#搜索框
        )
        submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-block')))#按钮
        input.send_keys("Information Management")#搜索框中输入
        submit.click()
        get_products()
        browser.implicitly_wait(10)

    except TimeoutException:  # 当网卡超时时,重新执行
        return search()

def next_page():
    try:
        next_page = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'a.next-pagination')))#下一页按钮
        next_page.click()
    except TimeoutException:  # 当网卡超时时,重新执行
        return search()


def get_products():
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#content .jobs')))#获取id为content下class为jobs的所有card js-job active
    html = browser.page_source
    print(html)#将html代码打印出来


def mian():
    print("第", 1, "页:")
    search()
    for i in range(2, 20):
        time.sleep(3)
        print("第", i, "页:")
        get_products()



if __name__ == '__main__':
    mian()

拜托哪位大神能教教我呀,小白一个

  • 点赞
  • 回答
  • 收藏
  • 复制链接分享

0条回答