我将页面上每个信息单独爬取出来,但是感觉这样写浪费了很多的时间,而且部分内容不知道该如何存入数据库,比如套餐信息,不同套餐的价格,不仅如此写完就感觉代码怎么看怎么low,如何能够让代码简洁
import tarfile
import mysql
import driver as driver
from selenium import webdriver
from bs4 import BeautifulSoup
import pymysql
driver = webdriver.Chrome(executable_path="C:\Program Files\Google\Chrome\Application\chromedriver.exe")
#1.创建浏览器对象(在那个网站找东西)
driver.get('https://item.jd.com/100021007440.html')
# driver.maximize_window()
# sleep(5)
def search_content():
base_price = driver.find_element_by_xpath('/html/body/div[6]/div/div[2]/div[4]/div/div[1]/div[2]/span[1]/span[2]').text
sku_name = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[1]').text
sku_number = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[2]').text
sku_weight = driver.find_element_by_xpath('//*[@id="detail"]/div[2]/div[1]/div[1]/ul[3]/li[3]').text
sku_from = driver.find_element_by_xpath('//*[@id="detail"]/div[2]/div[1]/div[1]/ul[3]/li[4]').text
CPUModel = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[5]').text
RunningMemory = driver.find_element_by_xpath('//*[@id="detail"]/div[2]/div[1]/div[1]/ul[3]/li[6]').text
MemorySize = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[7]').text
MemoryKar = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[8]').text
NumberofCameras = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[9]').text
TopPixel = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[11]').text
BackPixel = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[10]').text
ScreemSize = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[12]').text
ResolutionRatio = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[13]').text
ScreenRatio = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[14]').text
combo = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[15]').text
Charger = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[16]').text
HotSpot = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[17]').text
ScreenPercent = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[18]').text
function= driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[19]').text
OperatingSystem = driver.find_element_by_xpath('/html/body/div[10]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[3]/li[20]').text
url = 'https://item.jd.com/100021007440.html'
url = "'" + url + "'"
base_price = "'" + base_price + "'"
sku_name = "'" + sku_name + "'"
sku_number = "'" + sku_number + "'"
sku_weight = "'" + sku_weight + "'"
sku_from = "'" + sku_from + "'"
CPUModel = "'" + CPUModel + "'"
RunningMemory = "'" + RunningMemory + "'"
MemorySize = "'" + MemorySize + "'"
MemoryKar = "'" + MemoryKar + "'"
NumberofCameras = "'" + NumberofCameras + "'"
TopPixel = "'" + TopPixel + "'"
BackPixel = "'" + BackPixel + "'"
ScreemSize = "'" + ScreemSize + "'"
ResolutionRatio = "'" + ResolutionRatio + "'"
ScreenRatio = "'" + ScreenRatio + "'"
combo = "'" + combo + "'"
Charger = "'" + Charger + "'"
HotSpot = "'" + HotSpot + "'"
ScreenPercent = "'" + ScreenPercent + "'"
function = "'" + function + "'"
OperatingSystem = "'" + OperatingSystem + "'"
sql = "insert into product_details()value(" + url + ',' + base_price + ',' + sku_name + ',' + sku_number + ',' + sku_weight + ',' + sku_from + ',' + CPUModel + ',' + RunningMemory + ',' + MemorySize + ',' + MemoryKar + ',' + NumberofCameras + ',' + TopPixel + ',' + BackPixel + ',' + ScreemSize + ',' + ResolutionRatio + ',' + ScreenRatio + ',' + combo + ',' + Charger + ',' + HotSpot + ',' + ScreenPercent + ',' + function + ',' + OperatingSystem + ")";
connect() # 连接数据库,嵌套函数
insert_or_update(sql) # 插入数据
def connect():#定义数据库连接
return pymysql.connect(
host='127.0.0.1',#主机号
user='root',#用户
password='root',#数据库密码
database='jd_db',#数据库名
port=3306,#端口号
charset='utf8'#字符集
)
def insert_or_update(sql): # 定义数据库插入
con = connect() # 开启连接
cursor = con.cursor() # 获取游标
cursor.execute(sql) # 执行sql语句
con.commit() # 确认操作
cursor.close() # 关闭游标
con.close() # 关闭连接
# for introduction in text1_list:
# print(introduction.text)
# intro = introduction.text
if __name__ == '__main__':
search_content()