# -*- coding:utf-8 -*-
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup
import pymysql
import lxml
def login_info():
#通过扫码的形式去登录淘宝账号
browser.find_element_by_xpath('//*[@id="login"]/div[1]/i').click()
sleep(5)
#点击淘宝首页
taobao_index = browser.find_element_by_xpath('//*[@id="J_SiteNavHome"]/div/a')
taobao_index.click()
sleep(1)
def search_product(value):
# 标签定位
search_input = browser.find_element_by_id('q')
# 标签交互
search_input.send_keys(value)
# 执行一组js程序,拉到页面底部
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(2)
# 点击搜索按钮
btn = browser.find_element_by_css_selector('.btn-search')
btn.click()
def data(value):
# 执行一组js程序,拉到页面底部
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(2)
global shop_name_list, shop_price_list, shop_people_list, shop_location_list,a
shop_name_list = []
shop_price_list = []
shop_people_list = []
shop_location_list = []
a = 0
b = 44
for i in range(1,3):
page = browser.page_source
soup = BeautifulSoup(page, 'lxml')
shop_data_list = soup.find('div', class_='grid g-clearfix').find_all_next('div', class_='items')
for shop_data in shop_data_list:
# 商品名称
shop_image_data = shop_data.find_all('div',class_='pic')
for shop_data_a in shop_image_data:
shop_data_a = shop_data_a.find_all('a',class_='pic-link J_ClickStat J_ItemPicA')
for shop_name in shop_data_a:
shop_name = shop_name.find_all('img')[0]['alt']
shop_name_list.append(shop_name)
# 商品价格
shop_price_data = shop_data.find_all('div',class_='price g_price g_price-highlight')
for shop_price in shop_price_data:
shop_price_list.append(shop_price.text.strip())
# 付款人数
shop_people_number_data = shop_data.find_all('div',class_='deal-cnt')
for shop_people_number in shop_people_number_data:
shop_people_list.append(shop_people_number.text)
#地址s
shop_location_data = shop_data.find_all('div',class_='location')
for shop_location in shop_location_data:
shop_location_list.append(shop_location.text)
shop_data = zip(shop_name_list,shop_price_list,shop_people_list,shop_location_list)
for data in shop_data:
print(data)
a += 1
b += 44
browser.get(f"https://s.taobao.com/search?q={value}&s={b}")
sleep(0.5)
print('已成功爬取:%s条信息'%a)
return shop_name_list, shop_price_list, shop_people_list, shop_location_list,a
def mysql():
# 连接数据库
db = pymysql.connect(
host='localhost',
user='root',
password='123456',
database='dbtest',
port=3306,
charset='utf8',
cursorclass=pymysql.cursors.DictCursor
)
cursor = db.cursor()
# 如果表存在则删除
cursor.execute("drop table if exists taobao")
sql = "create table taobao (shop_name char(100) not null,price char(30),people_number char(30),location char(50))"
cursor.execute(sql)
for i in range(a):
li_name = shop_name_list[i]
li_price = shop_price_list[i]
li_people = shop_people_list[i]
li_location = shop_location_list[i]
sql = f"insert into taobao(shop_name,price,people_number,location) value('{li_name}','{li_price}','{li_people}','{li_location}')"
# 执行sql语句
cursor.execute(sql)
# 提交到数据库执行
db.commit()
db.close()
if __name__ == '__main__':
value = input("请输入你要查询的关键词:")
browser = webdriver.Chrome(executable_path='./chromedriver')
taobao_index = browser.get('https://login.taobao.com/member/login.jhtml')
# 窗口最大化
browser.maximize_window()
wait = WebDriverWait(browser, 10)
login_info()
search_product(value)
data(value)
请问大佬们在这个selenium代码中如何添加代理iP(小白的我在颤抖)
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
3条回答 默认 最新
- 多鱼的夏天 2021-04-21 14:21关注
109行改为:
PROXY = "xxxx:port" #这里写你的代理 chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--proxy-server=%s' % PROXY) browser = webdriver.Chrome(executable_path='./chromedriver',options=chrome_options)
本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 无用评论 打赏 举报
悬赏问题
- ¥15 Stata 面板数据模型选择
- ¥20 idea运行测试代码报错问题
- ¥15 网络监控:网络故障告警通知
- ¥15 django项目运行报编码错误
- ¥15 请问这个是什么意思?
- ¥15 STM32驱动继电器
- ¥15 Windows server update services
- ¥15 关于#c语言#的问题:我现在在做一个墨水屏设计,2.9英寸的小屏怎么换4.2英寸大屏
- ¥15 模糊pid与pid仿真结果几乎一样
- ¥15 java的GUI的运用