import requests
from lxml import etree
import re
def parser():
url = 'http://www.douban.com/tag/%E5%B0%8F%E8%AF%B4/?focus=book'
headers = {'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT)'}
html = requests.get(url=url,headers=headers).text
tree = etree.HTML(html)
book_list = tree.xpath('//div[@class="mod-list book-list"]/dl')
print(book_list)
with open('2.txt','w+',encoding='utf-8') as f:
for dl in book_list:
book_name = dl.xpath('./dd/a/text()')[0]
book_author = dl.xpath('./dd/div/text()')[0]
print(book_name,book_author)
f.write(book_name+book_author)
# f.write(+"\n")
if __name__ == '__main__':
parser()