from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup
def login_info():
browser.find_element_by_xpath('//*[@id="login"]/div[1]/i').click()
sleep(5)
taobao_index = browser.find_element_by_xpath('//*[@id="J_SiteNavHome"]/div/a')
taobao_index.click()
sleep(1)
def search_product(value):
search_input = browser.find_element_by_id('q')
search_input.send_keys(value)
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(2)
btn = browser.find_element_by_css_selector('.btn-search')
btn.click()
def data(value):
shop_name_list = []
shop_price_list = []
shop_people_list = []
shop_location_list = []
a = 0
b = 0
for i in range(1,3):
page = browser.page_source
soup = BeautifulSoup(page, 'lxml')
shop_data_list = soup.find('div', class_='grid g-clearfix').find_all_next('div', class_='items')
for shop_data in shop_data_list:
shop_image_data = shop_data.find_all('div',class_='pic')
for shop_data_a in shop_image_data:
shop_data_a = shop_data_a.find_all('a',class_='pic-link J_ClickStat J_ItemPicA')
for shop_name in shop_data_a:
shop_name = shop_name.find_all('img')[0]['alt']
shop_name_list.append(shop_name)
shop_price_data = shop_data.find_all('div',class_='price g_price g_price-highlight')
for shop_price in shop_price_data:
shop_price_list.append(shop_price.text.strip())
shop_people_number_data = shop_data.find_all('div',class_='deal-cnt')
for shop_people_number in shop_people_number_data:
shop_people_list.append(shop_people_number.text)
shop_location_data = shop_data.find_all('div',class_='location')
for shop_location in shop_location_data:
shop_location_list.append(shop_location.text)
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(2)
shop_data = zip(shop_name_list,shop_price_list,shop_people_list,shop_location_list)
for data in shop_data:
print(data)
a += 1
browser.get(f"https://s.taobao.com/search?q={value}&s={b}")
b += 44
print('已成功爬取:%s条信息'%a)
if __name__ == '__main__':
browser = webdriver.Chrome(executable_path='./chromedriver')
taobao_index = browser.get('https://login.taobao.com/member/login.jhtml')
browser.maximize_window()
wait = WebDriverWait(browser, 10)
login_info()
value = input("请输入你要查询的关键词:")
search_product(value)
data(value)