#-*_coding:utf8-*-
import requests
import re
import sys
from lxml import etree
reload(sys)
sys.setdefaultencoding("utf-8")
j = 0
all_links = []
for i in range(2):
link = re.sub('start=\d', 'start=%d' % j, url, re.S)
all_links.append(link)
j += 15
all_htmls = []
for i in range(2):
html = requests.get(all_links[i]).text
all_htmls.append(html)
all_items = []
for i in range(2):
selector = etree.HTML(all_htmls[i])
item_group = selector.xpath('//div[@class="grid-view"]/div[@class="item"]/div[@class="info"]/ul/li[@class="title"]/a/@href')
for j in range(len(item_group)):
all_items.append(item_group[j])
all_infos = []
for i in range(len(all_items)):
info = {}
titlelink = all_items[i]
titlehtml = requests.get(titlelink).text
selector = etree.HTML(titlehtml)
info['title'] = selector.xpath('//span[@property="v:itemreviewed"]/text()')
info['date'] = selector.xpath('//span[@class="year"]/text()')
info['rating'] = selector.xpath('//strong[@class="ll rating_num"]/text()')
all_infos.append(info)
f = open('info.txt', 'a')
j = 1
for each in all_infos:
f.writelines('No.:' + str(j) + '\n')
f.writelines('Title:' + each['title'] + '\n')
f.writelines('Date:' + each['date'] + '\n')
f.writelines('Rating:' + each['rating'] + '\n\n')
j += 1
f.close()
print j
这段代码运行后会报错,错误在
f.writelines('Title:' + each['title'] + '\n')
f.writelines('Date:' + each['date'] + '\n')
f.writelines('Rating:' + each['rating'] + '\n\n')
这几行,提示:
Traceback (most recent call last):
File "D:/D/PycharmProjects/untitled/456.py", line 46, in
f.writelines('Title:' + each['title'] + '\n')
TypeError: cannot concatenate 'str' and 'list' objects
百思不得其解,新手,正在试着自学入门python,请留情,请指教,多谢多谢!
另外:
我如果改成
str( each['title'])
str( each['date'])
str( each['rating'])
的话,程序可以运行,但是结果会是这样子:
No.:1
Title:[u'\u5144\u5f1f Br\xf8dre']
Date:['(2015)']
Rating:['8.9']
No.:2
Title:[u'\u6c42\u804c\u8bb0 Get a Job']
Date:['(2016)']
Rating:[]
No.:3
Title:[u'\u5931\u5e38 Anomalisa']
Date:['(2015)']
Rating:['7.4']
No.:4
Title:[u'\u8759\u8760\u4fa0\u5927\u6218\u8d85\u4eba\uff1a\u6b63\u4e49\u9ece\u660e Batman v Superman: Dawn of Justice']
Date:['(2016)']
Rating:['6.7']
No.:5
Title:[u'\u7231\u60c5\u91cd\u51fb Love Punch']
Date:['(2013)']
Rating:['6.4']
No.:6
Title:[u'\u98de\u8dc3\u60c5\u6d77 Beyond the Sea']
Date:['(2004)']
Rating:['7.6']
No.:7
Title:[u"\u8ff7\u59311971 '71"]
Date:['(2014)']
Rating:['7.1']
No.:8
Title:[u'\u6b7b\u4f8d Deadpool']
Date:['(2016)']
Rating:['7.6']
No.:9
Title:[u'\u7b2c\u4e94\u6ce2 The 5th Wave']
Date:['(2016)']
等等....
可我想要的结果是
No.:1
Title:Brødre
Date:(2015)
Rating:8.9
为什么呢?