3条回答 默认 最新
唯羽 2021-04-23 17:32关注应该是逻辑有点问题。4个lis在在循环外叠加的,每次zip都是上一次的叠加,相当于 a = 3 * 第一页数量 + 2* 第二页数量 + 1 * 第三页数量(猜测是页数吧,别介意)。按我这个改法再试试?
def data(value): # 执行一组js程序,拉到页面底部 browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') sleep(2) global shop_name_list, shop_price_list, shop_people_list, shop_location_list, a shop_name_list = [] shop_price_list = [] shop_people_list = [] shop_location_list = [] a = 0 b = 44 for i in range(1, 3): page = browser.page_source soup = BeautifulSoup(page, 'lxml') shop_data_list = soup.find('div', class_='grid g-clearfix').find_all_next('div', class_='items') for shop_data in shop_data_list: # 商品名称 shop_image_data = shop_data.find_all('div', class_='pic') for shop_data_a in shop_image_data: shop_data_a = shop_data_a.find_all('a', class_='pic-link J_ClickStat J_ItemPicA') for shop_name in shop_data_a: shop_name = shop_name.find_all('img')[0]['alt'] shop_name_list.append(shop_name) # 商品价格 shop_price_data = shop_data.find_all('div', class_='price g_price g_price-highlight') for shop_price in shop_price_data: shop_price_list.append(shop_price.text.strip()) # 付款人数 shop_people_number_data = shop_data.find_all('div', class_='deal-cnt') for shop_people_number in shop_people_number_data: shop_people_list.append(shop_people_number.text) # 地址s shop_location_data = shop_data.find_all('div', class_='location') for shop_location in shop_location_data: shop_location_list.append(shop_location.text) b += 44 browser.get(f"https://s.taobao.com/search?q={value}&s={b}") sleep(0.5) shop_data = zip(shop_name_list, shop_price_list, shop_people_list, shop_location_list) for data in shop_data: print(data) a += 1 print('已成功爬取:%s条信息' % a) print(shop_price_list) return shop_name_list, shop_price_list, shop_people_list, shop_location_list, a本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 无用评论 打赏 举报