怎么用python爬取network里面网页代码没有的网址，不用手动查看的

如题，我想问一下怎么用程序做到爬取network里面网页代码没有的网址，就是通过异步加载XHR出来的网址或者其他的内容，谢谢

写回答
好问题 0 提建议
追加酬金
关注问题
分享
邀请回答
编辑收藏删除
收藏举报

3条回答默认最新

piaoyiren 2022-02-19 15:35

关注


import datetime
import random
import time
import re
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
import pymongo
from lxml import html
import requests
from pyquery import PyQuery as pq
# 建立无用户名密码链接
client = pymongo.MongoClient('localhost', 27017)
# 建库
shidai = client['gongyuan']
#表创建，只有插入了文档，集合才能创建
comments = shidai['comments']

path_one = r'C:\chromedriver.exe'

COOKIES = '_lxsdk_cuid=16a3e5550cac8-0328ac989f3a72-3c644d0e-100200-16a3e5550cbc8; _lxsdk=16a3e5550cac8-0328ac989f3a72-3c644d0e-100200-16a3e5550cbc8; _hc.v=b108378a-8f67-0f82-24be-f6bd59936218.1555823941; s_ViewType=10; ua=zeroing; ctu=66a794ac79d236ecce433a9dd7bbb8bf29eff0bc049590703a72f844379eb7c5; dper=56648ebad0a12bed853d89482e9f3c35c89ef2504f07d5388fd0dfead6018398ae8c14a81efb6f9e42cb7e1f46473489252facff635921c09c106e3b36b311bafcd118a3e618fff67b5758b9bd5afca901c01dc9ec74027240ac50819479e9fc; ll=7fd06e815b796be3df069dec7836c3df; _lx_utm=utm_source%3Dgoogle%26utm_medium%3Dorganic; cy=2; cye=beijing; _lxsdk_s=16b84e44244-3d8-afd-795%7C1393851569%7C2'
f = open('C:/image/cehsi.txt', 'wb+')


class DianpingComment:
    font_size = 14
    start_y = 23

    def __init__(self, shop_id, cookies, delay=7, handle_ban=True, comments=comments):
        self.shop_id = shop_id
        self._delay = delay
        self.num = 1
        self.db = comments
        self._cookies = self._format_cookies(cookies)#获取cookies
        self._css_headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
        }
        self._default_headers = {
            'Connection': 'keep-alive',
            'Host': 'www.dianping.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
            'Cookie': '_lxsdk_cuid=16beb593744c8-082d3569f1b8da-e343166-100200-16beb593745c8; _lxsdk=16beb593744c8-082d3569f1b8da-e343166-100200-16beb593745c8; _hc.v=ead7aff3-40db-cb98-55ad-5460a0d10d6b.1563021622; s_ViewType=10; ua=zeroing; ctu=66a794ac79d236ecce433a9dd7bbb8bfac5ea81a9b7f2bdd8fe4eebbf54d3360; cy=169; cye=xuchang; dper=56cacd1d2e3f2645cfb85b48c96050d14127f349ac745cbe31b284282d72cf8960cfac5e2905d189386b038519f242d87f018031896f95f41ea215722b177d0d6619908c98d99eac35b14c560bc15035e0dc1d79e6dafff624d52dbb63d82db9; ll=7fd06e815b796be3df069dec7836c3df; uamo=13243174991; _lxsdk_s=16cbdc7eed1-542-97e-b28%7C%7C664'}
        self._cur_request_url = 'http://www.dianping.com/shop/{}/review_all'.format(self.shop_id)
        self.sub_url = 'http://www.dianping.com'

    def run(self):
        self._css_link = self._get_css_link(self._cur_request_url)#请求评论首页，获取css样式文件
        self._font_dict = self._get_font_dict(self._css_link)#获取css样式对应文字的字典
        self._get_conment_page()
    def _get_css_link(self, url):
        """
            请求评论首页，获取css样式文件
        """
        try:
            print(url)
            res = requests.get(url, headers=self._default_headers, cookies=self._cookies)
            html = res.text
            css_link = re.search(r'<link re.*?css.*?href="(.*?svgtextcss.*?)">', html)
            print(css_link)
            assert css_link
            css_link = 'http:' + css_link[1]
            return css_link
        except:
            None
    def _get_font_dict(self, url):
        """
            获取css样式对应文字的字典
        """
        res = requests.get(url, headers=self._css_headers)
        html = res.text

        background_image_link = re.findall(r'background-image:.*?\((.*?svg)\)', html)
        print(background_image_link)
        background_image_link_list = []
        for i in background_image_link:
            url = 'http:' + i
            background_image_link_list.append(url)

        print(background_image_link_list)

        html = re.sub(r'span.*?\}', '', html)
        group_offset_list = re.findall(r'\.([a-zA-Z0-9]{5,6}).*?round:(.*?)px (.*?)px;', html)
        '''
        多个偏移字典，合并在一起；；；
        '''
        font_dict_by_offset_list = {}
        for i in background_image_link_list:
            font_dict_by_offset_list.update(self._get_font_dict_by_offset(i))

        font_dict_by_offset = font_dict_by_offset_list
        print(font_dict_by_offset)
        font_dict = {}
        for class_name, x_offset, y_offset in group_offset_list:
            x_offset = x_offset.replace('.0', '')
            y_offset = y_offset.replace('.0', '')
            try:
                font_dict[class_name] = font_dict_by_offset[int(y_offset)][int(x_offset)]
                print("font_dict:   "+font_dict)
            except:
                font_dict[class_name] = ''
        return font_dict
    def _get_font_dict_by_offset(self, url):
        """
            获取坐标偏移的文字字典, 会有最少两种形式的svg文件（目前只遇到两种）
        """
        res = requests.get(url, headers=self._css_headers)
        html = res.text
        font_dict = {}
        y_list = re.findall(r'd="M0 (\d+?) ', html)
        if y_list:
            font_list = re.findall(r'<textPath .*?>(.*?)<', html)
            for i, string in enumerate(font_list):
                y_offset = self.start_y - int(y_list[i])

                sub_font_dict = {}
                for j, font in enumerate(string):
                    x_offset = -j * self.font_size
                    sub_font_dict[x_offset] = font
                font_dict[y_offset] = sub_font_dict
        else:
            font_list = re.findall(r'<text.*?y="(.*?)">(.*?)<', html)
            for y, string in font_list:
                y_offset = self.start_y - int(y)
                sub_font_dict = {}
                for j, font in enumerate(string):
                    x_offset = -j * self.font_size
                    sub_font_dict[x_offset] = font
                font_dict[y_offset] = sub_font_dict
        return font_dict

    def _get_conment_page(self):
        """
            请求评论页，并将<span></span>样式替换成文字;
        """
        while self._cur_request_url:
            self._delay_func()
            print('[{now_time}] {msg}'.format(now_time=datetime.datetime.now(), msg=self._cur_request_url))
            res = requests.get(self._cur_request_url, headers=self._default_headers, cookies=self._cookies)
            while res.status_code != 200:
                cookie = random.choice(COOKIES)
                cookies = self._format_cookies(cookie)
                res = requests.get(self._cur_request_url, headers=self._default_headers, cookies=cookies)
                if res.status_code == 200:
                    break
            html = res.text
            class_set = []
            for span in re.findall(r'<svgmtsi class="([a-zA-Z0-9]{5,6})"></svgmtsi>', html):
                class_set.append(span)
            for class_name in class_set:
                try:
                    html = re.sub('<svgmtsi class="%s"></svgmtsi>' % class_name, self._font_dict[class_name], html)
                    print('{}已替换完毕_______________________________'.format(self._font_dict[class_name]))
                except:
                    html = re.sub('<svgmtsi class="%s"></svgmtsi>' % class_name, '', html)
                    print('替换失败…………………………………………………………………………&&&&&&&&&&&&&&&&&&&&&&&&')
            doc = pq(html)
            self._parse_comment_page(html)
            if doc('.NextPage').attr('href'):
                self._default_headers['Referer'] = self._cur_request_url
                next_page_url1 = doc('.NextPage').attr('href')
                next_page_url = self.sub_url + str(next_page_url1)
                print('next_url:{}'.format(next_page_url))
            else:
                next_page_url = None
            print('next_page_url:{}'.format(next_page_url))
            self._cur_request_url = next_page_url

    def _delay_func(self):
        delay_time = random.randint((self._delay - 2) * 10, (self._delay + 2) * 10) * 0.1
        time.sleep(delay_time)

    def _init_browser(self):
        """
            初始化游览器
        """
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--disable-gpu')
        browser = webdriver.Chrome(chrome_options=chrome_options, executable_path=path_one)
        browser.get(self._cur_request_url)
        for name, value in self._cookies.items():
            browser.add_cookie({'name': name, 'value': value})
        browser.refresh()
        return browser

    def _handle_ban(self):
        """
            爬取速度过快，出现异常时处理验证
        """
        try:
            self._browser.refresh()
            time.sleep(1)
            button = self._browser.find_element_by_id('yodaBox')
            move_x_offset = self._browser.find_element_by_id('yodaBoxWrapper').size['width']
            webdriver.ActionChains(self._browser).drag_and_drop_by_offset(
                button, move_x_offset, 0).perform()
        except:
            pass

    def _format_cookies(self, cookies):
        '''
        获取cookies;;;
        :param cookies:
        :return:
        '''
        cookies = {cookie.split('=')[0]: cookie.split('=')[1]
                   for cookie in cookies.replace(' ', '').split(';')}
        return cookies


    def _data_pipeline(self, data):
        """
            处理数据
        """
        print(data)

    def _parse_comment_page(self, html):
        """
            解析评论页并提取数据,把数据写入文件中；；
        """
        doc = pq(html)
        for li in doc('div.review-list-main > div.reviews-wrapper > div.reviews-items > ul > li'):

            doc_text = pq(li)
            if doc_text('.dper-info .name').text():
                name = doc_text('.dper-info .name').text()
            else:
                name = None
            try:
                star = doc_text('.review-rank .sml-rank-stars').attr('class')

            except IndexError:
                star = None
            if doc_text('div.misc-info.clearfix > .time').text():
                date_time = doc_text('div.misc-info.clearfix > .time').text()
            else:
                date_time = None
            if doc_text('.main-review .review-words').text():
                comment = doc_text('.main-review .review-words').text()
            else:
                comment = None

            data = {
                'name': name,
                'date_time': date_time,
                'star': star,
                'comment': comment
            }
            print(data)
            f.write(str(data).encode('utf-8'))
            print('写入数据完成', data)







class Customer(DianpingComment):
    def _data_pipeline(self, data):
        print(data)


if __name__ == "__main__":
    dianping = Customer('4114867', cookies=COOKIES)
    dianping.run()
    f.close()

'''
  个人微信公众号：zeroing说

'''

# import datetime
# import random
# import time
# import re
# from selenium.webdriver.chrome.options import Options
# from selenium import webdriver
# import pymongo
# from lxml import etree
# import requests
# from pyquery import PyQuery as pq
#
# client = pymongo.MongoClient('localhost', 27017)
# shidai = client['gongyuan']
# comments = shidai['comments']
#
# path_one = r'C:\image\chromedriver.exe'
#
# COOKIES = '_lxsdk_cuid=16a3e5550cac8-0328ac989f3a72-3c644d0e-100200-16a3e5550cbc8; _lxsdk=16a3e5550cac8-0328ac989f3a72-3c644d0e-100200-16a3e5550cbc8; _hc.v=b108378a-8f67-0f82-24be-f6bd59936218.1555823941; s_ViewType=10; ua=zeroing; ctu=66a794ac79d236ecce433a9dd7bbb8bf29eff0bc049590703a72f844379eb7c5; dper=56648ebad0a12bed853d89482e9f3c35c89ef2504f07d5388fd0dfead6018398ae8c14a81efb6f9e42cb7e1f46473489252facff635921c09c106e3b36b311bafcd118a3e618fff67b5758b9bd5afca901c01dc9ec74027240ac50819479e9fc; ll=7fd06e815b796be3df069dec7836c3df; _lx_utm=utm_source%3Dgoogle%26utm_medium%3Dorganic; cy=2; cye=beijing; _lxsdk_s=16b84e44244-3d8-afd-795%7C1393851569%7C2'
# f = open('C:\\img\\cehsi.txt', 'wb+')
#
#
# class DianpingComment:
#     font_size = 14
#     start_y = 23
#
#     def __init__(self, shop_id, cookies, delay=7, handle_ban=True, comments=comments):
#         self.shop_id = shop_id
#         self._delay = delay
#         self.num = 1
#         self.db = comments
#         self._cookies = self._format_cookies(cookies)
#         self._css_headers = {
#             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
#         }
#         self._default_headers = {
#             'Connection': 'keep-alive',
#             'Host': 'www.dianping.com',
#             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
#             'Cookie': '_lxsdk_cuid=16beb593744c8-082d3569f1b8da-e343166-100200-16beb593745c8; _lxsdk=16beb593744c8-082d3569f1b8da-e343166-100200-16beb593745c8; _hc.v=ead7aff3-40db-cb98-55ad-5460a0d10d6b.1563021622; s_ViewType=10; ua=zeroing; ctu=66a794ac79d236ecce433a9dd7bbb8bfac5ea81a9b7f2bdd8fe4eebbf54d3360; cy=169; cye=xuchang; dper=56cacd1d2e3f2645cfb85b48c96050d14127f349ac745cbe31b284282d72cf8960cfac5e2905d189386b038519f242d87f018031896f95f41ea215722b177d0d6619908c98d99eac35b14c560bc15035e0dc1d79e6dafff624d52dbb63d82db9; ll=7fd06e815b796be3df069dec7836c3df; uamo=13243174991; _lxsdk_s=16cbdc7eed1-542-97e-b28%7C%7C664'}
#         self._cur_request_url = 'http://www.dianping.com/shop/{}/review_all'.format(self.shop_id)
#         self.sub_url = 'http://www.dianping.com'
#
#     def run(self):
#         self._css_link = self._get_css_link(self._cur_request_url)
#         self._font_dict = self._get_font_dict(self._css_link)
#         self._get_conment_page()
#
#     def _delay_func(self):
#         delay_time = random.randint((self._delay - 2) * 10, (self._delay + 2) * 10) * 0.1
#         time.sleep(delay_time)
#
#     def _init_browser(self):
#         """
#             初始化游览器
#         """
#         chrome_options = Options()
#         chrome_options.add_argument('--headless')
#         chrome_options.add_argument('--disable-gpu')
#         browser = webdriver.Chrome(chrome_options=chrome_options, executable_path=path_one)
#         browser.get(self._cur_request_url)
#         for name, value in self._cookies.items():
#             browser.add_cookie({'name': name, 'value': value})
#         browser.refresh()
#         return browser
#
#     def _handle_ban(self):
#         """
#             爬取速度过快，出现异常时处理验证
#         """
#         try:
#             self._browser.refresh()
#             time.sleep(1)
#             button = self._browser.find_element_by_id('yodaBox')
#             move_x_offset = self._browser.find_element_by_id('yodaBoxWrapper').size['width']
#             webdriver.ActionChains(self._browser).drag_and_drop_by_offset(
#                 button, move_x_offset, 0).perform()
#         except:
#             pass
#
#     def _format_cookies(self, cookies):
#         '''
#         获取cookies;;;
#         :param cookies:
#         :return:
#         '''
#         cookies = {cookie.split('=')[0]: cookie.split('=')[1]
#                    for cookie in cookies.replace(' ', '').split(';')}
#         return cookies
#
#     def _get_conment_page(self):
#         """
#             请求评论页，并将<span></span>样式替换成文字;
#         """
#         while self._cur_request_url:
#             self._delay_func()
#             print('[{now_time}] {msg}'.format(now_time=datetime.datetime.now(), msg=self._cur_request_url))
#             res = requests.get(self._cur_request_url, headers=self._default_headers, cookies=self._cookies)
#             while res.status_code != 200:
#                 cookie = random.choice(COOKIES)
#                 cookies = self._format_cookies(cookie)
#                 res = requests.get(self._cur_request_url, headers=self._default_headers, cookies=cookies)
#                 if res.status_code == 200:
#                     break
#             html = res.text
#             class_set = []
#             for span in re.findall(r'<svgmtsi class="([a-zA-Z0-9]{5,6})"></svgmtsi>', html):
#                 class_set.append(span)
#             for class_name in class_set:
#                 try:
#                     html = re.sub('<svgmtsi class="%s"></svgmtsi>' % class_name, self._font_dict[class_name], html)
#                     print('{}已替换完毕_______________________________'.format(self._font_dict[class_name]))
#                 except:
#                     html = re.sub('<svgmtsi class="%s"></svgmtsi>' % class_name, '', html)
#                     print('替换失败…………………………………………………………………………&&&&&&&&&&&&&&&&&&&&&&&&')
#             doc = pq(html)
#             self._parse_comment_page(html)
#             if doc('.NextPage').attr('href'):
#                 self._default_headers['Referer'] = self._cur_request_url
#                 next_page_url1 = doc('.NextPage').attr('href')
#                 next_page_url = self.sub_url + str(next_page_url1)
#                 print('next_url:{}'.format(next_page_url))
#             else:
#                 next_page_url = None
#             print('next_page_url:{}'.format(next_page_url))
#             self._cur_request_url = next_page_url
#
#     def _data_pipeline(self, data):
#         """
#             处理数据
#         """
#         print(data)
#
#     def _parse_comment_page(self, html):
#         """
#             解析评论页并提取数据,把数据写入文件中；；
#         """
#         doc = pq(html)
#         for li in doc('div.review-list-main > div.reviews-wrapper > div.reviews-items > ul > li'):
#
#             doc_text = pq(li)
#             if doc_text('.dper-info .name').text():
#                 name = doc_text('.dper-info .name').text()
#             else:
#                 name = None
#             try:
#                 star = doc_text('.review-rank .sml-rank-stars').attr('class')
#
#             except IndexError:
#                 star = None
#             if doc_text('div.misc-info.clearfix > .time').text():
#                 date_time = doc_text('div.misc-info.clearfix > .time').text()
#             else:
#                 date_time = None
#             if doc_text('.main-review .review-words').text():
#                 comment = doc_text('.main-review .review-words').text()
#             else:
#                 comment = None
#
#             data = {
#                 'name': name,
#                 'date_time': date_time,
#                 'star': star,
#                 'comment': comment
#             }
#             print(data)
#             f.write(str(data).encode('utf-8'))
#             print('写入数据完成', data)
#
#     def _get_css_link(self, url):
#         """
#             请求评论首页，获取css样式文件
#         """
#         try:
#             print(url)
#             res = requests.get(url, headers=self._default_headers, cookies=self._cookies)
#             html = res.text
#             css_link = re.search(r'<link re.*?css.*?href="(.*?svgtextcss.*?)">', html)
#             print(css_link)
#             assert css_link
#             css_link = 'http:' + css_link[1]
#             return css_link
#         except:
#             None
#
#     def _get_font_dict(self, url):
#         """
#             获取css样式对应文字的字典
#         """
#         res = requests.get(url, headers=self._css_headers)
#         html = res.text
#
#         background_image_link = re.findall(r'background-image:.*?\((.*?svg)\)', html)
#         print(background_image_link)
#         background_image_link_list = []
#         for i in background_image_link:
#             url = 'http:' + i
#             background_image_link_list.append(url)
#
#         print(background_image_link_list)
#
#         html = re.sub(r'span.*?\}', '', html)
#         group_offset_list = re.findall(r'\.([a-zA-Z0-9]{5,6}).*?round:(.*?)px (.*?)px;', html)
#         '''
#         多个偏移字典，合并在一起；；；
#         '''
#         font_dict_by_offset_list = {}
#         for i in background_image_link_list:
#             font_dict_by_offset_list.update(self._get_font_dict_by_offset(i))
#
#         font_dict_by_offset = font_dict_by_offset_list
#         print(font_dict_by_offset)
#         font_dict = {}
#         for class_name, x_offset, y_offset in group_offset_list:
#             x_offset = x_offset.replace('.0', '')
#             y_offset = y_offset.replace('.0', '')
#             try:
#                 font_dict[class_name] = font_dict_by_offset[int(y_offset)][int(x_offset)]
#
#             except:
#                 font_dict[class_name] = ''
#         return font_dict
#
#     def _get_font_dict_by_offset(self, url):
#         """
#             获取坐标偏移的文字字典, 会有最少两种形式的svg文件（目前只遇到两种）
#         """
#         res = requests.get(url, headers=self._css_headers)
#         html = res.text
#         font_dict = {}
#         y_list = re.findall(r'd="M0 (\d+?) ', html)
#         if y_list:
#             font_list = re.findall(r'<textPath .*?>(.*?)<', html)
#             for i, string in enumerate(font_list):
#                 y_offset = self.start_y - int(y_list[i])
#
#                 sub_font_dict = {}
#                 for j, font in enumerate(string):
#                     x_offset = -j * self.font_size
#                     sub_font_dict[x_offset] = font
#                 font_dict[y_offset] = sub_font_dict
#         else:
#             font_list = re.findall(r'<text.*?y="(.*?)">(.*?)<', html)
#             for y, string in font_list:
#                 y_offset = self.start_y - int(y)
#                 sub_font_dict = {}
#                 for j, font in enumerate(string):
#                     x_offset = -j * self.font_size
#                     sub_font_dict[x_offset] = font
#                 font_dict[y_offset] = sub_font_dict
#         return font_dict
#
#
# class Customer(DianpingComment):
#     def _data_pipeline(self, data):
#         print(data)
#
#
# if __name__ == "__main__":
#     dianping = Customer('4114867', cookies=COOKIES)
#     dianping.run()
#     f.close()

本回答被题主选为最佳回答 , 对您是否有帮助呢?

查看更多回答(2条)

报告相同问题？

关注问题

怎么用python爬取network里面网页代码没有的网址，不用手动查看的 python
2022-02-19 08:16

回答 3 已采纳 import datetime import random import time import re from selenium.webdriver.chrome.options import O
python 爬取的代码与网页上的代码不同 python 爬虫
2022-03-10 00:27

回答 1 已采纳这种结果用正则表达式提取就行，不能用xpath，另外如果结果数据类型为json的话可以把他转换成字典取值。python里面有json，jsonpath等模块就可以搞这种字符串的。有帮助的话采纳一下哦！
python 爬取网页里面的xhr文件。 css html5 python
2019-05-07 20:14

回答 1 已采纳首先，在常规头部获取请求的url，然后查看请求方法，获取请求状态，如果是失败，就说明不是这个。然后，请求头部，将所有的headers放到requests的headers参数里面(可以包含cooki
使用Python爬取一个网页并解析
2021-05-15 20:27

执章学长的博客使用Python爬取一个网页并解析爬虫准备Python基础语法爬取一个网页的整体思路基础概念简单思路详细思路代码示例数据在目标url中数据通过其他url返回爬虫准备 Python基础语法首先，当你看到这篇文章的时候，我们...
python爬取网页题库如何将选项一一对应 python
2022-10-06 15:37

回答 1 已采纳拿去 import requests from bs4 import BeautifulSoup url_list = [] option_one = [] option_two = [] op =
python爬取到的网页文本，保存本地txt显示文件为空？什么原因呢？ python 有问必答
2022-04-07 11:17

回答 3 已采纳 w改为a（追加），要不会将当前写入的内容覆盖文件内容要么将open和close放到for循环外
python爬虫爬取网页代码遇到了一些问题 python 爬虫
2022-08-17 17:07

回答 3 已采纳因为元素里的你要的内容是通过 ajax 请求动态加载的，可以浏览器抓包去看下，你想要的这条数据到底是哪个请求返回的，找到真正的请求，然后模拟发送就行了
python 爬取网页请求，并生成表格（简单demo）
2023-07-21 11:26

Unique·Blue的博客使用python语言爬取网络请求（requests），并手动处理数据，生成表格(openpyx)
如何用python爬取一个网页上的问题与答案的两部分文字部分？？ python
2019-06-29 17:19

回答 2 已采纳其实就是对网站获得的数据做一个整理，用正则表达式找出你想要的东西就行 ``` from bs4 import BeautifulSoup import requests url="https:
如何使用python爬取canvas中的内容呢？ css javascript python
2022-04-29 10:30

回答 1 已采纳需要在浏览器上临时显示后端实时处理的图像，需要将图像数据转成json字符串传输给js绘图。后端python处理： import cv2 as cvfrom encodings import base
python爬取网页论坛写入csv 没有内容 python
2021-12-17 22:50

回答 1 已采纳先看看你爬到啥内容了。
python爬取网页实时数据_数据科学必备技能：用 Python 爬取网页
2020-11-24 20:00

weixin_39575937的博客原标题：数据科学必备技能：用 Python 爬取网页我作为数据科学家的第一个任务，就是做网页爬取。那时候，我对使用代码从网站上获取数据这项技术完全一无所知，它偏偏又是最有逻辑性并且最容易获得的数据来源。在几次...
python爬取的内容想添加到代码里面 python
2022-01-07 14:44

回答 6 已采纳 Python有两个相关的东西，一个叫eval，一个叫exec。它们可以实现执行字符串中的代码（不同的是eval不支持复杂代码）。不够是吧？对不起，我们还有一个叫runfile的东西。Debugger
python爬虫，python学习，如何用python爬取视频资源
2021-09-19 18:42

僭醴。的博客郑重声明：该文章仅供参考学习，他人不得转载，利用非法手段牟利。...第二步，编写python代码四.AES加密的的m3u8文件 first.我们需要下载每一集的目录（m3u8文件） second.上代码这篇文章的由来，是我为.
beautifulsoup爬取网页中的表格_用 Python 爬取网页
2020-11-20 00:10

weixin_39756273的博客那时候，我对使用代码从网站上获取数据这项技术完全一无所知，它偏偏又是最有逻辑性并且最容易获得的数据来源。在几次尝试之后，网页爬取对我来说就几乎是种本能行为了。如今，它更成为了我几乎每天都要用到的少数几...
没有解决我的问题, 去提问

问题事件

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
系统已结题 3月1日
关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
已采纳回答 2月21日
关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
创建了问题 2月19日

悬赏问题

¥20 西门子S7-Graph,S7-300，梯形图
¥50 用易语言http 访问不了网页
¥50 safari浏览器fetch提交数据后数据丢失问题
¥15 matlab不知道怎么改，求解答！！
¥15 永磁直线电机的电流环pi调不出来
¥15 用stata实现聚类的代码
¥15 请问paddlehub能支持移动端开发吗？在Android studio上该如何部署？
¥20 docker里部署springboot项目，访问不到扬声器
¥15 netty整合springboot之后自动重连失效
¥15 悬赏！微信开发者工具报错，求帮改

怎么用python爬取network里面网页代码没有的网址，不用手动查看的

3条回答 默认 最新

问题事件

悬赏问题

3条回答默认最新