weixin_40718782 2020-11-08 20:32 采纳率: 0%
浏览 202

python爬取天眼查详情页,使用bs4解析页面,解析不对,我该怎么办?

import requests
import pandas as pd
import lxml
import sys
from bs4 import BeautifulSoup
import time
import urllib
import cx_Oracle
import random
def get_company_info(url):
    headers = {
        'Host': 'www.tianyancha.com',
        'Connection': 'keep-alive',
        'Cache-Control': 'max-age=0',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': r'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Referer': url,  # 'https://www.tianyancha.com/search?key=%E5%B1%B1%E4%B8%9C%20%E7%A7%91%E6%8A%80',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cookie': r'TYCID=63830870fbcd11e98ce4453b01823d7c; undefined=63830870fbcd11e98ce4453b01823d7c; ssuid=5981952400; _ga=GA1.2.1810217122.1572519518; jsid=SEM-BAIDU-PZ0703-VIP-000001; tyc-user-phone=%255B%252215519540825%2522%255D; RTYCID=ffb3c7300dae48ee84b62236fcb07b5a; CT_TYCID=4892acecf4a5457aad309f1ffbfaea83; aliyungf_tc=AQAAACrF4T6wIwcA1xZ4atN0uO+itPnS; csrfToken=xMOjh1bOrZyXDmgm8xlm8v5B; bannerFlag=false; _gid=GA1.2.927901130.1596874364; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1596874364,1596875281,1596875446,1596875842; tyc-user-info=%257B%2522claimEditPoint%2522%253A%25220%2522%252C%2522vipToMonth%2522%253A%2522false%2522%252C%2522explainPoint%2522%253A%25220%2522%252C%2522personalClaimType%2522%253A%2522none%2522%252C%2522integrity%2522%253A%252210%2525%2522%252C%2522state%2522%253A0%252C%2522score%2522%253A%252232%2522%252C%2522announcementPoint%2522%253A%25220%2522%252C%2522bidSubscribe%2522%253A%2522-1%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%252216%2522%252C%2522discussCommendCount%2522%253A%25220%2522%252C%2522showPost%2522%253Anull%252C%2522claimPoint%2522%253A%25220%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNTUxOTU0MDgyNSIsImlhdCI6MTU5Njc2ODkxMCwiZXhwIjoxNjI4MzA0OTEwfQ.YIMycf-IugDd1cEZZc3Oz0Sm-9TIOjpC-pHksUbsXVRvs8_DZcHQyLwgGnbDFrmbY8LeDfMbXhBKpyxNGvi_yQ%2522%252C%2522schoolAuthStatus%2522%253A%25222%2522%252C%2522scoreUnit%2522%253A%2522%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522myTidings%2522%253A%25220%2522%252C%2522companyAuthStatus%2522%253A%25222%2522%252C%2522myAnswerCount%2522%253A%25220%2522%252C%2522myQuestionCount%2522%253A%25220%2522%252C%2522signUp%2522%253A%25220%2522%252C%2522privateMessagePointWeb%2522%253A%25220%2522%252C%2522nickname%2522%253A%2522%25E9%2598%25BF%25E8%25AF%25BA%25C2%25B7%25E6%2596%25BD%25E7%2593%25A6%25E8%25BE%259B%25E6%25A0%25BC%2522%252C%2522privateMessagePoint%2522%253A%25220%2522%252C%2522bossStatus%2522%253A%25222%2522%252C%2522isClaim%2522%253A%25220%2522%252C%2522yellowDiamondEndTime%2522%253A%25220%2522%252C%2522yellowDiamondStatus%2522%253A%2522-1%2522%252C%2522pleaseAnswerCount%2522%253A%25220%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252215519540825%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNTUxOTU0MDgyNSIsImlhdCI6MTU5Njc2ODkxMCwiZXhwIjoxNjI4MzA0OTEwfQ.YIMycf-IugDd1cEZZc3Oz0Sm-9TIOjpC-pHksUbsXVRvs8_DZcHQyLwgGnbDFrmbY8LeDfMbXhBKpyxNGvi_yQ; token=59de0e12318f4fae80a400631835ee8c; _utm=c8133ed5adef4f958152d5cc669aaf91; cloud_token=239e162145f74549904e057b32d3b607; cloud_utm=3aef726f01594fb98e8ab0744968fcd8; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1596875886',
    }
    response = requests.get(url, headers=headers)

    soup = BeautifulSoup(response.text, 'lxml')
    print(soup)

if __name__ == '__main__':
    url = 'https://www.tianyancha.com/company/2353004954'
    get_company_info(url)

 

  • 写回答

1条回答 默认 最新

  • 考古学家lx(李玺) python领域优质创作者 2020-11-09 16:15
    关注

    确认页面是否正常,换别的解析库

    评论

报告相同问题?

悬赏问题

  • ¥60 版本过低apk如何修改可以兼容新的安卓系统
  • ¥25 由IPR导致的DRIVER_POWER_STATE_FAILURE蓝屏
  • ¥50 有数据,怎么建立模型求影响全要素生产率的因素
  • ¥50 有数据,怎么用matlab求全要素生产率
  • ¥15 TI的insta-spin例程
  • ¥15 完成下列问题完成下列问题
  • ¥15 C#算法问题, 不知道怎么处理这个数据的转换
  • ¥15 YoloV5 第三方库的版本对照问题
  • ¥15 请完成下列相关问题!
  • ¥15 drone 推送镜像时候 purge: true 推送完毕后没有删除对应的镜像,手动拷贝到服务器执行结果正确在样才能让指令自动执行成功删除对应镜像,如何解决?