weixin_43213530 2020-04-06 19:42 采纳率: 0%
浏览 254
已结题

python爬取某宝产生的问题

爬取当前页与第二页的数据相同

import requests
import re
import os

#获取商品页面
def requestUrl(url):
 try:
   # 设置userAgent
   headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36","Cookie":"xx"}
   response = requests.get(url, headers=headers,timeout=5)
   # 查看返回状态
   response.raise_for_status()
   response.encoding = response.apparent_encoding
   htmlText = response.text
   return htmlText
 except:
   print("error")
#查找关键词
def getTheText(text,ulist):
  if(len(ulist)!=0):
     ulist=[]
  jiaGeRepx=re.compile(r'\"view_price\"\:\"[\d\.]*\"')
  nameRepx=re.compile(r'\"raw_title\"\:\".*?\"')
  jiaGeList = jiaGeRepx.findall(text)
  nameList = nameRepx.findall(text)

  for i in range(len(nameList)):
      try:
          name = eval(nameList[i].split(":")[1])
          jiaGe = eval(jiaGeList[i].split(":")[1])
          ulist.append([name,jiaGe])
      except:
          name = nameList[i].split(":")[1]
          jiaGe = jiaGeList[i].split(":")[1]
          ulist.append([name, jiaGe])
count=0
##将结果输出到文件
def formatText(ulist,shenDu):
   jieGuo=""
   biaoDaShi = "{0:^9}\t{1:^50}\t{2:^9}\r\n"
   if (shenDu==0):
       xuHao = "序号"
       shangPin = "商品"
       jiaGe="价格"
       jieGuo = biaoDaShi.format(xuHao,shangPin,jiaGe)
   for tag in ulist :
       global count
       count+=1
       xuHao = str(count)
       shangPin = tag[0]
       jiaGe = str(tag[1])
       jieGuo += biaoDaShi.format(xuHao,shangPin,jiaGe)
   dstUrl = 'D://商品.txt'
   with open(dstUrl, 'a') as f:
    f.write(jieGuo)
    f.close


def main():
   uinfo = []
   shangPin = "书包"
   shenDu=2
   for i in range(shenDu):
        htmlText = requestUrl("https://s.某宝.com/search?q="+shangPin+"&s="+str(44*i))
        getTheText(htmlText,uinfo)
        formatText(uinfo,i)



if __name__ == '__main__':
    main()

url中的&s=44代表第二页

https://s.某宝.com/search?q=书包=3&ntoffset=3&p4ppushleft=1%2C48&s=44

结果
文件重复写入page1

  • 写回答

2条回答 默认 最新

  • 创帆云 2020-04-06 22:37
    关注

    不要想着爬某宝了 ,他们的反爬相当的牛比

    即使程序开发完了,爬几下就会弹出复杂的验证,就爬不下去了

    评论

报告相同问题?

悬赏问题

  • ¥15 交替优化波束形成和ris反射角使保密速率最大化
  • ¥15 树莓派与pix飞控通信
  • ¥15 自动转发微信群信息到另外一个微信群
  • ¥15 outlook无法配置成功
  • ¥30 这是哪个作者做的宝宝起名网站
  • ¥60 版本过低apk如何修改可以兼容新的安卓系统
  • ¥25 由IPR导致的DRIVER_POWER_STATE_FAILURE蓝屏
  • ¥50 有数据,怎么建立模型求影响全要素生产率的因素
  • ¥50 有数据,怎么用matlab求全要素生产率
  • ¥15 TI的insta-spin例程