以沐、 2016-12-06 12:20 采纳率: 0%
浏览 2483

python爬虫出错 各位大神能不能帮我看一下是什么问题?python2.7

import urllib2
import urllib
import re

class BDTB:
def init(self,baseUrl,see_LZ):
self.baseURL = baseUrl
self.seeLZ = '?see_lz='+str(see_LZ)

def getPage(self,pageNum):
    try:
        url = self.baseURL + self.seeLZ + '&pn=' + str(pageNum)
        request = urllib2.Request(url)
        response  =urllib2.urlopen(request)
        return response
    except urllib2.URLError , e:
        if hasattr(e,"reason"):
            print u"link fail,reason",e.reason
            return None
def getTitle(self):
    page = self.getPage(1)
    pattern = re.compile('<h3 class="core_title_txt.*?>(.*?)</h3>',re.S)
    result = re.search(pattern,page)
    if result:
        print result.group(1)
        return result.group(1).strip()
    else:
        return None

def getPageNum(self):
    page = self.getPage(1)
    print page.read()
    pattern = re.compile('<li class="l_reply_num.*?</span>.*?<span.*?>(.*?)</span>', re.S)
    result = re.search(pattern, page)
    if result:
        print result.group(1)
        return result.group(1).strip()
    else:
        return None

def getContent(self):
    page = self.getPage(1)
    pattern = re.complie('<div id="post_content_.*?>(.*?)</div>',re.S)
    items = re.findall(pattern,page)
    for item in items:
        print item

baseURL = "http://tieba.baidu.com/p/4866982459"
bdtb = BDTB(baseURL,1)
#bdtb.getPage(1)
#bdtb.getTitle()
#bdtb.getPageNum()
bdtb.getContent()

运行getTitle()的错误:
Traceback (most recent call last):
File "F:\python学习\程序代码\爬虫\123.py", line 51, in
bdtb.getTitle()
File "F:\python学习\程序代码\爬虫\123.py", line 23, in getTitle
result = re.search(pattern,page)
File "C:\Python27\lib\re.py", line 146, in search
return _compile(pattern, flags).search(string)
TypeError: expected string or buffer

运行getPageNum()的错误:
Traceback (most recent call last):
File "F:\python学习\程序代码\爬虫\123.py", line 52, in
bdtb.getPageNum()
File "F:\python学习\程序代码\爬虫\123.py", line 34, in getPageNum
result = re.search(pattern, page)
File "C:\Python27\lib\re.py", line 146, in search
return _compile(pattern, flags).search(string)
TypeError: expected string or buffer

运行getContent()时候发生的错误:
Traceback (most recent call last):
File "F:\python学习\程序代码\爬虫\123.py", line 53, in
bdtb.getContent()
File "F:\python学习\程序代码\爬虫\123.py", line 43, in getContent
pattern = re.complie('

  • 写回答

3条回答 默认 最新

  • oyljerry 2016-12-06 13:07
    关注

    获取的字符串编码格式问题

    评论

报告相同问题?

悬赏问题

  • ¥15 #MATLAB仿真#车辆换道路径规划
  • ¥15 java 操作 elasticsearch 8.1 实现 索引的重建
  • ¥15 数据可视化Python
  • ¥15 要给毕业设计添加扫码登录的功能!!有偿
  • ¥15 kafka 分区副本增加会导致消息丢失或者不可用吗?
  • ¥15 微信公众号自制会员卡没有收款渠道啊
  • ¥100 Jenkins自动化部署—悬赏100元
  • ¥15 关于#python#的问题:求帮写python代码
  • ¥20 MATLAB画图图形出现上下震荡的线条
  • ¥15 关于#windows#的问题:怎么用WIN 11系统的电脑 克隆WIN NT3.51-4.0系统的硬盘