直接上代码
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys #键盘按键操作
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
import time
import pymysql
def get_goods(driver):
try:
goods=driver.find_elements_by_class_name('gl-item')
for good in goods:
detail_url=good.find_element_by_tag_name('a').get_attribute('href')
p_name=good.find_element_by_css_selector('.p-name em').text.replace('\n','')
price=good.find_element_by_css_selector('.p-price i').text
p_commit=good.find_element_by_css_selector('.p-commit a').text
msg = '''
商品 : %s
链接 : %s
价钱 :%s
评论 :%s
''' % (p_name,detail_url,price,p_commit)
sku = "'" + p_name + "'"
url = "'" + detail_url + "'"
price = "'" + price + "'"
commit = "'" + p_commit + "'"
print(msg,end='\n\n')
# print(datalist)
sql = "insert into product()value(" + sku + ','+ price + ','+ commit + ',' + url + ",)";
connect() # 连接数据库,嵌套函数
insert_or_update(sql) # 插入数据
button=driver.find_element_by_partial_link_text('下一页')
button.click()
time.sleep(1)
get_goods(driver)
except Exception:
pass
def connect():#定义数据库连接
return pymysql.connect(
host='127.0.0.1',#主机号
user='root',#用户
password='root',#数据库密码
database='jd_db',#数据库名
port=3306,#端口号
charset='utf8'#字符集
)
def insert_or_update(sql):#定义数据库插入
con = connect()#开启连接
cursor = con.cursor()#获取游标
cursor.execute(sql)#执行sql语句
con.commit()#确认操作
cursor.close()#关闭游标
con.close()#关闭连接
def spider(url,keyword):
driver = webdriver.Chrome()
driver.get(url)
driver.implicitly_wait(3) # 使用隐式等待
try:
input_tag=driver.find_element_by_id('key')
input_tag.send_keys(keyword)
input_tag.send_keys(Keys.ENTER)
get_goods(driver)
finally:
driver.close()
if __name__ == '__main__':
spider('https://www.jd.com/',keyword='显卡3080')