python爬取天眼查详情页,使用bs4解析页面,解析不对,我该怎么办?

import requests
import pandas as pd
import lxml
import sys
from bs4 import BeautifulSoup
import time
import urllib
import cx_Oracle
import random
def get_company_info(url):
    headers = {
        'Host': 'www.tianyancha.com',
        'Connection': 'keep-alive',
        'Cache-Control': 'max-age=0',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': r'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Referer': url,  # 'https://www.tianyancha.com/search?key=%E5%B1%B1%E4%B8%9C%20%E7%A7%91%E6%8A%80',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cookie': r'TYCID=63830870fbcd11e98ce4453b01823d7c; undefined=63830870fbcd11e98ce4453b01823d7c; ssuid=5981952400; _ga=GA1.2.1810217122.1572519518; jsid=SEM-BAIDU-PZ0703-VIP-000001; tyc-user-phone=%255B%252215519540825%2522%255D; RTYCID=ffb3c7300dae48ee84b62236fcb07b5a; CT_TYCID=4892acecf4a5457aad309f1ffbfaea83; aliyungf_tc=AQAAACrF4T6wIwcA1xZ4atN0uO+itPnS; csrfToken=xMOjh1bOrZyXDmgm8xlm8v5B; bannerFlag=false; _gid=GA1.2.927901130.1596874364; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1596874364,1596875281,1596875446,1596875842; tyc-user-info=%257B%2522claimEditPoint%2522%253A%25220%2522%252C%2522vipToMonth%2522%253A%2522false%2522%252C%2522explainPoint%2522%253A%25220%2522%252C%2522personalClaimType%2522%253A%2522none%2522%252C%2522integrity%2522%253A%252210%2525%2522%252C%2522state%2522%253A0%252C%2522score%2522%253A%252232%2522%252C%2522announcementPoint%2522%253A%25220%2522%252C%2522bidSubscribe%2522%253A%2522-1%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%252216%2522%252C%2522discussCommendCount%2522%253A%25220%2522%252C%2522showPost%2522%253Anull%252C%2522claimPoint%2522%253A%25220%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNTUxOTU0MDgyNSIsImlhdCI6MTU5Njc2ODkxMCwiZXhwIjoxNjI4MzA0OTEwfQ.YIMycf-IugDd1cEZZc3Oz0Sm-9TIOjpC-pHksUbsXVRvs8_DZcHQyLwgGnbDFrmbY8LeDfMbXhBKpyxNGvi_yQ%2522%252C%2522schoolAuthStatus%2522%253A%25222%2522%252C%2522scoreUnit%2522%253A%2522%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522myTidings%2522%253A%25220%2522%252C%2522companyAuthStatus%2522%253A%25222%2522%252C%2522myAnswerCount%2522%253A%25220%2522%252C%2522myQuestionCount%2522%253A%25220%2522%252C%2522signUp%2522%253A%25220%2522%252C%2522privateMessagePointWeb%2522%253A%25220%2522%252C%2522nickname%2522%253A%2522%25E9%2598%25BF%25E8%25AF%25BA%25C2%25B7%25E6%2596%25BD%25E7%2593%25A6%25E8%25BE%259B%25E6%25A0%25BC%2522%252C%2522privateMessagePoint%2522%253A%25220%2522%252C%2522bossStatus%2522%253A%25222%2522%252C%2522isClaim%2522%253A%25220%2522%252C%2522yellowDiamondEndTime%2522%253A%25220%2522%252C%2522yellowDiamondStatus%2522%253A%2522-1%2522%252C%2522pleaseAnswerCount%2522%253A%25220%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252215519540825%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNTUxOTU0MDgyNSIsImlhdCI6MTU5Njc2ODkxMCwiZXhwIjoxNjI4MzA0OTEwfQ.YIMycf-IugDd1cEZZc3Oz0Sm-9TIOjpC-pHksUbsXVRvs8_DZcHQyLwgGnbDFrmbY8LeDfMbXhBKpyxNGvi_yQ; token=59de0e12318f4fae80a400631835ee8c; _utm=c8133ed5adef4f958152d5cc669aaf91; cloud_token=239e162145f74549904e057b32d3b607; cloud_utm=3aef726f01594fb98e8ab0744968fcd8; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1596875886',
    }
    response = requests.get(url, headers=headers)

    soup = BeautifulSoup(response.text, 'lxml')
    print(soup)

if __name__ == '__main__':
    url = 'https://www.tianyancha.com/company/2353004954'
    get_company_info(url)

 

查看全部
weixin_40718782
weixin_40718782
2020/11/08 20:32
  • python
  • 点赞
  • 收藏
  • 回答
    私信

1个回复