from lxml import etree
import requests
import xlwt
import xlrd
class qunawang(object):
def __init__(self):
self.f = xlwt.Workbook() # 创建工作薄
self.sheet1 = self.f.add_sheet(u'景点信息', cell_overwrite_ok=True) # 命名table
self.rowsTitle = [u'编号',u'景点名', u'景点介绍', u'景点价格', u'景点地址', u'景点网址'] # 创建标题
for i in range(0, len(self.rowsTitle)):
# 最后一个参数设置样式
self.sheet1.write(0, i, self.rowsTitle[i], self.set_style('Times new Roman', 220, True))
# Excel保存位置
self.f.save('F:/information/viewspot.xlsx')
def set_style(self, name, height, bold=False):
style = xlwt.XFStyle() # 初始化样式
font = xlwt.Font() # 为样式创建字体
font.name = name
font.bold = bold
font.colour_index = 2
font.height = height
style.font = font
return style
def getUrl(self):
#加入自动换Url功能
url = ('http://piao.qunar.com/ticket/list.htm?keyword=%E5%8D%97%E4%BA%AC®ion=&from=mpl_search_suggest&page=2')
self.spiderPage(url)
def spiderPage(self,url):
if url is None:
return None
try:
data=xlrd.open_workbook('F:/information/viewspot.xlsx')
table=data.sheets()[0]
rowCount=table.nrows#获取行数
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
headers={'User-Agent':user_agent}
respon=requests.get(url,headers=headers)
htmltext=respon.text
s = etree.HTML(htmltext)
trs = s.xpath('//*[@id="search-list"]/div')
m=0
for tr in trs:
data=[]
title = tr.xpath('./div/div[2]/h3/a/text()')
location = tr.xpath('./div/div[2]/div/p/span/text()')
introduce = tr.xpath('./div/div[2]/div/div[2]/text()')
price = tr.xpath('./div/div[3]/table/tr[1]/td/span/em/text()')
website = tr.xpath('./div/div[2]/h3/a/@href')
title=title[0] if title else ''
location=location[0] if location else ''
introduce=introduce[0] if introduce else ''
price=price[0] if price else ''
website=website[0] if website else ''
data.append(rowCount+m)
data.append(title)
data.append(introduce)
data.append(price)
data.append(location)
data.append(website)
for i in range(len(data)):
self.sheet1.write(rowCount+m,i,data[i])
m+=1
print(m)
print(title, introduce, price, location, website)
finally:
self.f.save('F:/information/viewspot.xlsx')
if '_main_':
qn=qunawang()
qn.getUrl()
刚刚接触爬虫,参考了网上的一些代码,爬取的是去哪网南京的景点,我想加入可以翻页的功能,该如何添加