这是我按照学习教程写的爬取斗鱼主播界面的简单代码,代码如下:
from selenium import webdriver
from selenium.webdriver.common.by import By
class Douyu():
def __init__(self):
self.url='https://www.douyu.com/directory/all'
self.driver=webdriver.Chrome()
def parse_data(self):
self.driver.implicitly_wait(2)
room_list=self.driver.find_elements(By.XPATH,'//*[@id="listAll"]/section[2]/div[2]/ul/li/div')
#遍历房间列表,从每一个房间节点中获取数据
for room in room_list:
try:
haha = {}
haha['title'] = room.find_element(By.XPATH, './a/div[2]/div[1]/h3').text
haha['type'] = room.find_element(By.XPATH, './a/div[2]/div[1]/span').text
haha['owner'] = room.find_element(By.XPATH, './a/div[2]/div[2]/h2/div').text
haha['num'] = room.find_element(By.XPATH, './a/div[2]/div[2]/span').text
haha['src'] = room.find_element(By.XPATH, './a/div[1]/div[1]/picture/img').get_attribute('src')
print(haha)
except:
pass
# def save_data(self,data_list):
# for data in data_list:
# print(data)
def run(self):
#url
#driver
#get
self.driver.get(self.url)
self.parse_data()
if __name__ == '__main__':
douyu=Douyu()
douyu.run()
可是爬取出来的却是两条,有时候一条,这是怎么回事?
{'title': 'AG 0:0 狼队', 'type': '王者荣耀', 'owner': '王者荣耀官方赛事', 'num': '708.8万', 'src': 'https://rpic.douyucdn.cn/live-cover/coverupdate/2023/08/12/e112a2e164dd4657c434a4b9bb0a5f80.jpg/dy1'}
{'title': '七圣召唤主播资格赛决赛进行中!', 'type': '原神', 'owner': '丘丘人大队队长', 'num': '69.3万', 'src': 'https://rpic.douyucdn.cn/live-cover/coverupdate/2023/08/08/8c40ce9a0f97a89813f94a38ec1324f8.jpg/dy1'}
当我去掉try时,报如下错误:
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found
(Session info: chrome=115.0.5790.171); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception
Stacktrace:
望解决,谢谢!