weixin_43213530 2020-04-06 19:42 采纳率: 0%
浏览 254
已结题

python爬取某宝产生的问题

爬取当前页与第二页的数据相同

import requests
import re
import os

#获取商品页面
def requestUrl(url):
 try:
   # 设置userAgent
   headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36","Cookie":"xx"}
   response = requests.get(url, headers=headers,timeout=5)
   # 查看返回状态
   response.raise_for_status()
   response.encoding = response.apparent_encoding
   htmlText = response.text
   return htmlText
 except:
   print("error")
#查找关键词
def getTheText(text,ulist):
  if(len(ulist)!=0):
     ulist=[]
  jiaGeRepx=re.compile(r'\"view_price\"\:\"[\d\.]*\"')
  nameRepx=re.compile(r'\"raw_title\"\:\".*?\"')
  jiaGeList = jiaGeRepx.findall(text)
  nameList = nameRepx.findall(text)

  for i in range(len(nameList)):
      try:
          name = eval(nameList[i].split(":")[1])
          jiaGe = eval(jiaGeList[i].split(":")[1])
          ulist.append([name,jiaGe])
      except:
          name = nameList[i].split(":")[1]
          jiaGe = jiaGeList[i].split(":")[1]
          ulist.append([name, jiaGe])
count=0
##将结果输出到文件
def formatText(ulist,shenDu):
   jieGuo=""
   biaoDaShi = "{0:^9}\t{1:^50}\t{2:^9}\r\n"
   if (shenDu==0):
       xuHao = "序号"
       shangPin = "商品"
       jiaGe="价格"
       jieGuo = biaoDaShi.format(xuHao,shangPin,jiaGe)
   for tag in ulist :
       global count
       count+=1
       xuHao = str(count)
       shangPin = tag[0]
       jiaGe = str(tag[1])
       jieGuo += biaoDaShi.format(xuHao,shangPin,jiaGe)
   dstUrl = 'D://商品.txt'
   with open(dstUrl, 'a') as f:
    f.write(jieGuo)
    f.close


def main():
   uinfo = []
   shangPin = "书包"
   shenDu=2
   for i in range(shenDu):
        htmlText = requestUrl("https://s.某宝.com/search?q="+shangPin+"&s="+str(44*i))
        getTheText(htmlText,uinfo)
        formatText(uinfo,i)



if __name__ == '__main__':
    main()

url中的&s=44代表第二页

https://s.某宝.com/search?q=书包=3&ntoffset=3&p4ppushleft=1%2C48&s=44

结果
文件重复写入page1

  • 写回答

2条回答 默认 最新

  • 创帆云 2020-04-06 22:37
    关注

    不要想着爬某宝了 ,他们的反爬相当的牛比

    即使程序开发完了,爬几下就会弹出复杂的验证,就爬不下去了

    评论

报告相同问题?

悬赏问题

  • ¥50 树莓派安卓APK系统签名
  • ¥15 maple软件,用solve求反函数出现rootof,怎么办?
  • ¥65 汇编语言除法溢出问题
  • ¥15 Visual Studio问题
  • ¥20 求一个html代码,有偿
  • ¥100 关于使用MATLAB中copularnd函数的问题
  • ¥20 在虚拟机的pycharm上
  • ¥15 jupyterthemes 设置完毕后没有效果
  • ¥15 matlab图像高斯低通滤波
  • ¥15 针对曲面部件的制孔路径规划,大家有什么思路吗