qq_43201710
AinD
2019-02-26 20:16

requests+xpath爬虫关于url翻页问题

  • python
from lxml import etree
import requests
import xlwt
import xlrd

class qunawang(object):

    def __init__(self):
        self.f = xlwt.Workbook()  # 创建工作薄
        self.sheet1 = self.f.add_sheet(u'景点信息', cell_overwrite_ok=True)  # 命名table
        self.rowsTitle = [u'编号',u'景点名', u'景点介绍', u'景点价格', u'景点地址', u'景点网址']  # 创建标题
        for i in range(0, len(self.rowsTitle)):
                # 最后一个参数设置样式
            self.sheet1.write(0, i, self.rowsTitle[i], self.set_style('Times new Roman', 220, True))
            # Excel保存位置
        self.f.save('F:/information/viewspot.xlsx')

    def set_style(self, name, height, bold=False):
        style = xlwt.XFStyle()  # 初始化样式
        font = xlwt.Font()  # 为样式创建字体
        font.name = name
        font.bold = bold
        font.colour_index = 2
        font.height = height
        style.font = font
        return style

    def getUrl(self):
        #加入自动换Url功能
        url = ('http://piao.qunar.com/ticket/list.htm?keyword=%E5%8D%97%E4%BA%AC&region=&from=mpl_search_suggest&page=2')
        self.spiderPage(url)

    def spiderPage(self,url):
        if url is None:
            return None


        try:
            data=xlrd.open_workbook('F:/information/viewspot.xlsx')
            table=data.sheets()[0]
            rowCount=table.nrows#获取行数
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
            headers={'User-Agent':user_agent}
            respon=requests.get(url,headers=headers)
            htmltext=respon.text
            s = etree.HTML(htmltext)
            trs = s.xpath('//*[@id="search-list"]/div')
            m=0
            for tr in trs:
                data=[]
                title = tr.xpath('./div/div[2]/h3/a/text()')
                location = tr.xpath('./div/div[2]/div/p/span/text()')
                introduce = tr.xpath('./div/div[2]/div/div[2]/text()')
                price = tr.xpath('./div/div[3]/table/tr[1]/td/span/em/text()')
                website = tr.xpath('./div/div[2]/h3/a/@href')
                title=title[0] if title else ''
                location=location[0] if location else ''
                introduce=introduce[0] if introduce else ''
                price=price[0] if price else ''
                website=website[0] if website else ''


                data.append(rowCount+m)
                data.append(title)
                data.append(introduce)
                data.append(price)
                data.append(location)
                data.append(website)

                for i in range(len(data)):
                    self.sheet1.write(rowCount+m,i,data[i])

                m+=1
                print(m)
                print(title, introduce, price, location, website)

        finally:
            self.f.save('F:/information/viewspot.xlsx')

if '_main_':
    qn=qunawang()
    qn.getUrl()

刚刚接触爬虫,参考了网上的一些代码,爬取的是去哪网南京的景点,我想加入可以翻页的功能,该如何添加

  • 点赞
  • 回答
  • 收藏
  • 复制链接分享

0条回答

为你推荐

换一换