# -*- coding:utf-8 -*-
from selenium.webdriver.common.by import By
import re
import lxml.html
import matplotlib.pyplot as plt
from selenium import webdriver
import pandas as pd
import time
from selenium import webdriver
driver = webdriver.Chrome() # 创建driver对象
driver.get('/') # 打开页面 虎牙官网
time.sleep(2)
a = driver.find_element(
By.XPATH, '//a[@class="hy-nav-link hy-nav-link-ext clickstat"]')
a.click()
time.sleep(2)
b = driver.find_element(By.XPATH, '//li[@data-gid="1"]')
b.click()
time.sleep(2)
driver.switch_to.window(driver.window_handles[-1]) # 读取新页面
html = driver.page_source
xp = lxml.html.fromstring(html) # lxml对象
units = xp.xpath('//li[@class="game-live-item"]')
df = pd.DataFrame() # 没有text()
for u in units:
t = {}
t['标题'] = u.xpath('./a/@title')[0]
t['主播'] = u.xpath('./span/span/i[@class="nick"]')[0].xpath("string()")
t['人气'] = u.xpath('./span/span[2]/i[@class="js-num"]/text()')[0]
df0 = pd.DataFrame([t])
df = pd.concat([df, df0], ignore_index=True)
print(df)
原有代码想通过selenium登入虎牙官网但是不知道怎么绕过滑动验证码