孔网里的书摊即将关闭,想获取自己在孔夫子旧书网中的卖家上书信息(包括书名、分类、品相、定价),我的初步代码如下,但是未能顺利爬取,在jupyter中也没报错,请帮我把代码中的问题找出来,谢谢~
import requests
from lxml.html import etree
import time
import csv
headers={
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36'
}
def kongfz_booksinfo(url):
res = requests.get(url, headers=headers)
selector = etree.HTML(res.text)
contents = selector.xpath('//html/body/div[6]/div[2]/div[2]/div[3]/div[1]') #循环点
def contents():
for content in contents:
title = content.xpath('//*[@id="unsold_list_table"]/tbody/tr[1]/td[1]/div/div[3]/abs/text()')[0] #书名
bookclass = content.xpath('//*[@id="unsold_list_table"]/tbody/tr[1]/td[2]/div/div[1]/p/text()')[0] #分类
newly = content.xpath('//*[@id="unsold_list_table"]/tbody/tr[1]/td[3]/div[1]/div[1]/text()')[0] #品相
price = content.xpath('//*[@id="unsold_list_table"]/tbody/tr[1]/td[4]/div[1]/div[1]/text()')[0] #定价
writer = csv.writer(fp)
writer.writerow((title,bookclass, newly, price))
if __name__=='__main__':
urls = ['https://seller.kongfz.com/shop/item.html#unsold_244'.format(str(i))for i in range(0,100)] #写csv首行
with open('C://Users/Pekon/Desktop/kongfz.csv', 'a+', newline='', encoding='utf-8')as f:
writer = csv.writer(f)
writer.writerow(('title', 'bookclass', 'newly', 'price'))
for url in urls:
kongfz_booksinfo(url)