Magic丶Kid 2024-04-19 11:27 采纳率: 50%
浏览 7
已结题

python 网页内容解析

<link href="/statics/modules/channel/channel.css" rel="stylesheet">
<div class="row" id="feedApp">
    <div class="col-md-8 dashboard">
                XXX  
    </div>
</div>
<script src="//www.tedschool.cn/statics/js/feeds.js" type="text/javascript"></script>
<script src="/statics/modules/channel/channel.js" type="text/javascript"></script>
<script type="text/javascript">
    function getNowDate() {
        var mydate = new Date();
        var str = "" + mydate.getFullYear() + "年";
        str += (mydate.getMonth() + 1) + "月";
        str += mydate.getDate() + "日";
        return str;
    }


    function doChannelSearch(page=1){
        // href="/feeds/channels/searchFeeds?channel=3285&amp;gename=shq2raj1" predo="searchChannels" rel="feedSearch" target="navTab"

        var href = "/feeds/channels/searchFeeds?channel="+'3285'+"&gename="+'shq2raj1'+"&page="+page;
        var word = $("input[name='channelSearchWord']").val();
        var url = href+"&word="+word;
        function highlightAndReplace(originalString, searchString) {
          // 使用正则表达式创建一个全局匹配的正则,不区分大小写
          var regex = new RegExp(searchString, 'gi');

          // 使用 replace 方法进行替换,并添加自定义标签
          var replacedString = originalString.replace(regex, function(match) {
            return '<span style="color:red;">' + match + '</span>';
          });

          return replacedString;
        }

        $.ajax({
            url: url,
            dataType: 'json',
            success: ret => {

                if(ret.statusCode < 300){

                    let feeds = ret.data.feeds;
                    let splitwords = ret.data.splitwords;
                    splitwords = sortByWordLengthDesc(splitwords); // 搜索关键词按大到小排序

                    for(let p in feeds){
                        feeds[p]['showTitle'] = 0;
                        //feeds[p]['approve'] = [];
                        //feeds[p]['replyList'] = [];
                        //feeds[p]['creditList'] = [];

                        try{
                            feeds[p]['jsoninfo'] = JSON.parse(feeds[p]['moreinfo']);
                        }catch(error){
                            feeds[p]['jsoninfo'] = [];
                        }
                        

                        // 高亮显示, 搜索关键词
                        for(let sword of splitwords){
                            feeds[p]['title'] = highlightAndReplace(feeds[p]['title'], sword);
                        }
                        

                        // 高亮显示, 搜索关键词
                        for(let i in feeds[p]['jsoninfo']){
                            
                            for(let sword of splitwords){

                                if(typeof feeds[p]['jsoninfo'][i]['title'] != 'undefined'){
                                    feeds[p]['jsoninfo'][i]['title'] = highlightAndReplace(feeds[p]['jsoninfo'][i]['title'], sword);
                                }
                                
                                if(typeof feeds[p]['jsoninfo'][i]['content'] != 'undefined'){
                                    feeds[p]['jsoninfo'][i]['content'] = highlightAndReplace(feeds[p]['jsoninfo'][i]['content'], sword);
                                }
                                
                            }
                                
                        }
                        


                        feeds[p]['fromuid'] = feeds[p]['uid'];
                        //feeds[p]['showInteractActions'] = false;
                        try{
                            feeds[p]['attachments'] = JSON.parse(feeds[p]['attachments']);
                        }catch(error){
                            feeds[p]['attachments'] = {
                                'imageList': [],
                                'videoList':[],
                                'audioList': []
                            };
                        }
                        
                        
                        feeds[p]['roleTitle'] = false;


                    }


                    if(page > 1){
                        vm.feedlist = vm.feedlist.concat(feeds);
                    }else{
                        vm.feedlist = feeds;
                    }

                    vm.searchType = true;

                    vm.showSearchMore = feeds.length >= 20 ? true : false;
                }else{
                    swal("提示", ret.message, "warning");
                }
            }
        })
        
    }

    /**
     *对Date的扩展,将 Date 转化为指定格式的String
     *月(M)、日(d)、小时(h)、分(m)、秒(s)、季度(q) 可以用 1-2 个占位符,
     *年(y)可以用 1-4 个占位符,毫秒(S)只能用 1 个占位符(是 1-3 位的数字)
     *例子:
     *(new Date()).Format("yyyy-MM-dd hh:mm:ss.S") ==> 2006-07-02 08:09:04.423
     *(new Date()).Format("yyyy-M-d h:m:s.S")      ==> 2006-7-2 8:9:4.18
     */
    Date.prototype.format = function (fmt) {
        var o = {
            "M+": this.getMonth() + 1, //月份
            "d+": this.getDate(), //日
            "h+": this.getHours(), //小时
            "m+": this.getMinutes(), //分
            "s+": this.getSeconds(), //秒
            "q+": Math.floor((this.getMonth() + 3) / 3), //季度
            "S": this.getMilliseconds() //毫秒
        };
        if (/(y+)/.test(fmt)) fmt = fmt.replace(RegExp.$1, (this.getFullYear() + "").substr(4 - RegExp.$1.length));
        for (var k in o)
            if (new RegExp("(" + k + ")").test(fmt)) fmt = fmt.replace(RegExp.$1, (RegExp.$1.length == 1) ? (o[k]) : (("00" + o[k]).substr(("" + o[k]).length)));
        return fmt;
    }

    /**
     * 检查是否迟到, 如果迟到提供补打卡的选项
     * */
    function checklater() {
        let h = new Date().getHours();
        let m = new Date().getMinutes();

        h = parseInt(h);
        m = parseInt(m);

        let repairinfo = "";
        let repairtime = "";
        let href = $(this).attr("href");

        let todayObj = new Date();
        let today = todayObj.format("yyyy-MM-dd");

        if ((h == 9 && m > 30) || h > 9) { // 迟到
            if (confirm("要补打卡?")) {
                repairtime = prompt("实际到岗的时间是?", today + " 09:00:00");
                repairinfo = prompt("补打卡的理由是?", "忘记打卡");

                href = href + "?repairtime=" + repairtime + "&repairinfo=" + repairinfo;

                $(this).attr('href', href);
            }
        }
    }

    var vm = new Vue({
        provide() {
            return {
                uid: '777868'
            };
        },
        components: {
            TedChannelMomentPro: ChannelModule.components.TedChannelMomentPro
        },
        data() {
            return {
                phpData: {
                    channelUname: "测试"
                },
                channels,
                com: {
                    gename: "sh31aj1",
                    gid: "9843",
                    channel: channel,
                    webdomain: "https://www.tedschool.cn"
                },
                title: '',
                nowtime: "1713496105",
                uid: "777868",
                nickTitle: {"777868":"\u5b50","123123":"\u5b32",},
                feedlist: [{"id":"1270866","faid":"0","gid":"3614","uid":"2551273","username":"13850559392#053a07","type":"userFeed","infotype":"info","title":"大病初愈后体悟到的人生真谛","bookname":"早起诵读","images":"","info":"","moreinfo":"[{\"feedtype\":\"4\",\"value\":0,\"title\":\"大病初愈后体悟到的人生真谛\",\"content\":\"模板\\n【组 别】 第x组\\n【分享人】 xxx\\n【分享时间】2024年04月19日(周五)\\n【文章名称】《活法》第一章第四节\\n大病初愈后体悟到的人生真谛\\n\\n知−−−《金句摘录》:\\nxxx\\n\\n觉−−−【案例感悟】:\\nxxx\\n\\n行−−−【行动计划】:(5W2H)\\nxxx\",\"dotime\":\"2024-04-19 07:24:11\"}]","attachments":{"imageList":[],"videoList":[],"audioList":[{"size":304559,"status":"success","duration":152,"path":"https://atts.tedschool.cn/edone/minic/4pwhbk6tfwfjczqmnd0uk_1713482274660.mp3","ext":"mp3","filename":"4pwhbk6tfwfjczqmnd0uk_1713482274660.mp3"},{"size":349797,"status":"success","duration":174,"path":"https://atts.tedschool.cn/edone/minic/h0z1bwtffcsfcwddx4w93_1713482452009.mp3","ext":"mp3","filename":"h0z1bwtffcsfcwddx4w93_1713482452009.mp3"},{"size":387813,"status":"success","duration":193,"path":"https://atts.tedschool.cn/edone/minic/0o2262wjcbpx9z7mart0p_1713482649084.mp3","ext":"mp3","filename":"0o2262wjcbpx9z7mart0p_1713482649084.mp3"}]},"tags":"","toid":"1270866","pename":"shq2raj1_dvukh","tolink":"1270133","to":"{\"gid\":3614,\"channel\":[\"3285\"],\"user\":[\"2551273\"],\"intype\":\"trian\",\"sendPush\":1}","tochannel":[3285],"touser":"2551273","tofollow":"","addtime":"2024-04-19 07:24:11","updatetime":"2024-04-19 07:24:11","likecount":"0","operating":"0","levelRequired":"0","repeatTime":"91次","starttime":null,"endtime":null,"finishedState":"0","recalltime":"0","recallstamp":"0","ItemPrize":"","hasBadWord":"0","publicpower":"2","istop":"0","ispublic":"0","status":"1","mpid":"1668189","cid":"100625","ismakeup":"0","timer":"541","starcount":"0","fid":"1270866","tocid":"3285","touid":"0","listorder":"0","tempstar":"0","pushTime":"2024-04-19 07:24:11.304915","roleTitle":"组长","channels":[{"cid":"3285","name":"测试"}],"avatar":"/attachments/avatar2/avatar_2551273.jpg","jsoninfo":[{"feedtype":"4","value":0,"title":"大病初愈后体悟到的人生真谛","content":"模板\n【组 别】 第x组\n【分享人】 xxx\n【分享时间】2024年04月19日(周五)\n【文章名称】《活法》第一章第四节\n大病初愈后体悟到的人生真谛\n\n知−−−《金句摘录》:\nxxx\n\n觉−−−【案例感悟】:\nxxx\n\n行−−−【行动计划】:(5W2H)\nxxx","dotime":"2024-04-19 07:24:11"}],"fromname":"测试1","fromuid":"2551273","replyList":[],"creditList":[],"approve":[],"viptype":2,"vipexpire":"2025-01-06 09:33:07","showTitle":0,"isStar":0,"log_score":"0","log_reference_score":"0","log_final_score":"0","hasscore":1,"remark":""},{"id":"1270836","faid":"0","gid":"3614","uid":"2552666","username":"18960211517#379ed8","type":"userFeed","infotype":"info","title":"大病初愈后体悟到的人生真谛","bookname":"早起诵读","images":"","info":"","moreinfo":"[{\"feedtype\":\"4\",\"value\":0,\"title\":\"大病初愈后体悟到的人生真谛\",\"content\":\"模板\\n【组 别】 第x组\\n【分享人】 xxx\\n【分享时间】2024年04月19日(周五)\\n【文章名称】《活法》第一章第四节\\n大病初愈后体悟到的人生真谛\\n\\n知−−−《金句摘录》:\\nxxx\\n\\n觉−−−【案例感悟】:\\nxxx\\n\\n行−−−【行动计划】:(5W2H)\\nxxx\",\"dotime\":\"2024-04-19 07:14:52\"}]","attachments":{"imageList":[],"videoList":[],"audioList":[{"size":591517,"duration":147,"status":"success","path":"https://atts.tedschool.cn/edone/minic/mqzto26tc6xpf65zxym97_1713481793186.mp3","ext":"mp3","filename":"mqzto26tc6xpf65zxym97_1713481793186.mp3"},{"size":565290,"duration":141,"status":"success","path":"https://atts.tedschool.cn/edone/minic/9vyig6z1v5sggnfucvb6a_1713481937058.mp3","ext":"mp3","filename":"9vyig6z1v5sggnfucvb6a_1713481937058.mp3"},{"size":597577,"duration":149,"status":"success","path":"https://atts.tedschool.cn/edone/minic/3v2bm09ecbld1wmjxlhq9_1713482090037.mp3","ext":"mp3","filename":"3v2bm09ecbld1wmjxlhq9_1713482090037.mp3"}]},"tags":"","toid":"1270836","pename":"shq2raj1_dvukh","tolink":"1270104","to":"{\"gid\":3614,\"channel\":[\"3285\"],\"user\":[\"2552666\"],\"intype\":\"trian\",\"sendPush\":1}","tochannel":[3285],"touser":"2552666","tofollow":"","addtime":"2024-04-19 07:14:52","updatetime":"2024-04-19 07:14:52","likecount":"0","operating":"0","levelRequired":"0","repeatTime":"88次","starttime":null,"endtime":null,"finishedState":"0","recalltime":"0","recallstamp":"0","ItemPrize":"","hasBadWord":"0","publicpower":"2","istop":"0","ispublic":"0","status":"1","mpid":"1668187","cid":"100625","ismakeup":"0","timer":"453","starcount":"0","fid":"1270836","tocid":"3285","touid":"0","listorder":"0","tempstar":"0","pushTime":"2024-04-19 07:14:52.104778","roleTitle":"","channels":[{"cid":"3285","name":"测试"}],"avatar":"/attachments/avatar2/avatar_2552666.jpg","jsoninfo":[{"feedtype":"4","value":0,"title":"大病初愈后体悟到的人生真谛","content":"模板\n【组 别】 第x组\n【分享人】 xxx\n【分享时间】2024年04月19日(周五)\n【文章名称】《活法》第一章第四节\n大病初愈后体悟到的人生真谛\n\n知−−−《金句摘录》:\nxxx\n\n觉−−−【案例感悟】:\nxxx\n\n行−−−【行动计划】:(5W2H)\nxxx","dotime":"2024-04-19 07:14:52"}],"fromname":"测试2","fromuid":"2552666","replyList":[],"creditList":[],"approve":[],"viptype":2,"vipexpire":"2025-02-04 11:36:51","showTitle":0,"isStar":0,"log_score":"0","log_reference_score":"0","log_final_score":"0","hasscore":1,"remark":""}],
                searchType: false,
                showSearchMore: false,
                searchPage: 1
            };
        },
        methods: {
            replaceTextToHtml(v) {
                if (!v) {
                    return '';
                }
                return v.toString().replace(/(\r|\n|\r\n)/g, '<br>').replace(/\s/g, '&nbsp;');
            },
            replaceComment(v) {
                if (!v) {
                    return ''
                }
                return this.replaceTextToHtml(v).replace(/\[]/g, '<i class="i-icon i-icon-border"></i>').replace(/\[x]/g, '<i class="i-icon i-icon-check-square"></i>');
            },
            navigateToViewContent(options) {

            },
            navigateToPlayForm() {

            },
            doSearchMore(){
                this.searchPage += 1;
                doChannelSearch(this.searchPage);
            }
        },
        el: '#feedApp'
    });
    jQuery(document).ready(function () {
        //FeedCom.atwho("#tab_share .share-content",nickTitle);
        //FeedCom.atchannel("#tab_share .share-channel",channels);

        // 监听搜索框
        $(".channel-search-input").on("keyup", function(event){
            if(event.key === 'Enter'){
                doChannelSearch();
            }
        });

        $("#tab_default a").click(function () {
            var activeId = $(".quick-add li.active a").attr('href');
            $("#tab_default").removeClass('active');
            $(activeId).addClass('active');
        });
        $(".quick-cancel").click(function () {
            var activeId = $(".quick-add li.active a").attr('href');
            $("#tab_default").addClass('active');
            $(".quick-add li.active a").text('分享动态');
            $("#infotype .btn-view").text('分享动态');
            $("input[name=infotype]").val('info');
            $(activeId).removeClass('active');
        });
        $("#info_default").click(function () {
            $(".quick-add li.active a").text('分享动态');
            $("#infotype .btn-view").text('分享动态');
            $("input[name=infotype]").val('info');
            $("#tab_default").removeClass('active');
            $("#tab_share").addClass('active');
        });
        $("#infotype a").click(function () {
            var infotype = $(this).attr('value');
            $(".quick-add li.active a").text($(this).text());
            $("#infotype .btn-view").text($(this).text());
            $("input[name=infotype]").val(infotype);
        });
        $("#publicpower a").click(function () {
            var publicpower = $(this).attr('value');
            $("#publicpower .btn-view").text($(this).text());
            $("input[name=publicpower]").val(publicpower);
        });
        $("#tab_share .add-share").click(function () {
            uploadAttachments();
        });

        //选择附件
        $("#feed-attachments").change(function () {
            var filrarr = $("#feed-attachments")[0].files;
            //将FileList对象变成数组
            feed.fileList = feed.fileList.concat(Array.from(filrarr));
            for (var i = 0; i < feed.fileList.length; i++) {
                reviewFile(feed.fileList[i], i)
            }
            return;
            $("#atta-upload").ajaxSubmit({
                dataType: 'json',
                beforeSend: function () {
                    //$("div.loading-css3").show();
                },
                uploadProgress: function () {
                    //$("div.loading-css3").hide();
                },
                success: function (data) {
                    if (data.statusCode == '200') {
                        attachmentsList.push(data.data);
                        //var html = '<li role="presentation" val="none"><a href="javascript:;" role="menuitem" >'+ data.data.filename +'</a></li>';
                        //$(".attachments-list").append(html);
                    } else {
                        console.log(data.message);
                    }
                },
                error: function (xhr) {
                    console.log(xhr);
                }
            });
        });

        //删除附件
        $(".attachments-list").on('click', '.file-del', function () {
            let parent = $(this).parent();
            let index = $(this).attr('fid');
            feed.fileList.splice(index, 1);
            parent.remove()
        });

    });

    //发布信息
    function postFeed() {
        var info = $("#tab_share .share-content").val();
        var infotype = $("input[name=infotype]").val();
        var publicpower = $("input[name=publicpower]").val();
        var atchannel = $("#tab_share .share-channel").val();
        $.ajax({
            url: "/feeds/postFeed",
            type: "post",
            dataType: 'json',
            data: {
                gename: gename,
                channel: channel,
                ptemplate: 'dshare',
                atchannel: atchannel,
                infotype: infotype,
                publicpower: publicpower,
                attachmentsList: feed.attachmentsList,
                info: info
            },
            beforeSend: function () {
                quickSending();

                $("#tab_share").removeClass('active');
            },
            success: function (sinfo) {
                //清空并恢复到初始状态
                $("#tab_share .share-content").val('');
                $(".quick-add li.active a").text('分享动态');
                $("#infotype .btn-view").text('分享动态');
                $("input[name=infotype]").val('info');
                navTab.reloadCurrentTab();
                quickComplete();
            }
        });
    }

    //上传附件
    function uploadAttachments() {
        if (feed.fileList.length > 0) {
            let formData = new FormData();
            for (var i = 0, len = feed.fileList.length; i < len; i++) {

                formData.append('attachments[]', feed.fileList[i]);
            }
            $.ajax({
                url: "/feeds/uploadFile/?gename=shq2raj1",
                type: 'post',
                data: formData,
                dataType: 'json',
                processData: false,
                contentType: false,
                success: function (data, statusText, headers) {
                    if (data.statusCode == '200') {
                        feed.attachmentsList = data.data;
                        postFeed();
                    } else {
                        console.log(data.message);
                    }
                }
            });

        } else {
            postFeed();
        }
    }

    function reviewFile(file, i) {
        //实例化fileReader,
        let fd = new FileReader();
        //获取当前选择文件的类型
        let fileType = file.type;
        //调它的readAsDataURL并把原生File对象传给它,
        fd.readAsDataURL(file);//base64
        //监听它的onload事件,load完读取的结果就在它的result属性里了

        fd.onload = function () {

            if (/^image\/[jpeg|png|jpg|gif]/.test(fileType)) {
                $(".attachments-list").append('<li class="file-item"><img src="' + this.result + '" width="50" alt=""><span class="file-name">' + file.name + '</span><span class="file-del" fid="' + i + '">删除</span></li>')
            } else {
                $(".attachments-list").append('<li class="file-item"><span class="file-name">' + file.name + '</span><span class="file-del" fid="' + i + '">删除</span></li>')
            }
        }
    }
</script>
<script src="https://7n.tedschool.cn/statics/plugins/echarts/echarts.min.js" type="text/javascript"></script>

bs4获取到的html数据,怎么解析获取其中script内的data()里的nickTitle和feedlist数据

img

  • 写回答

4条回答 默认 最新

  • 关注

    上午好☀️☀️☀️️
    本答案参考ChatGPT-3.5

    根据提供的代码,可以看出这份Python代码是用于解析网页内容的。

    首先,代码中包含了一些HTML标签,例如<link><div><script>等。这些标签用来定义网页的样式和布局,以及引入JavaScript脚本。

    代码中最重要的部分是使用JavaScript和jQuery库进行网页的内容解析。具体来说,doChannelSearch()函数是一个用于搜索频道的函数,其中包含了对搜索结果进行处理的逻辑。

    doChannelSearch()函数中,通过发送Ajax请求获取搜索结果数据,然后对数据进行处理和展示。其中涉及到一个highlightAndReplace()函数,用于将搜索关键词进行高亮显示。

    根据代码的逻辑,可以总结出以下解决方案:

    1. 分析HTML标签,确定网页的结构和内容布局。
    2. 分析JavaScript代码,理解网页内容处理的逻辑。
    3. 确定需要解析的数据,例如搜索结果中的标题、内容等。
    4. 使用Python的网络爬虫库(例如Requests)获取网页内容。
    5. 使用Python的HTML解析库(例如BeautifulSoup)解析HTML标签,提取需要的数据。
    6. 使用Python的正则表达式库(例如re)对数据进行处理,例如高亮关键词等。
    7. 将解析后的数据进行存储或展示。

    需要注意的是,代码中可能存在一些依赖项,例如引入的外部JavaScript脚本、CSS样式文件等。在进行解析时,可能需要获取这些依赖项的内容,以确保解析的正确性。

    最终,根据解析结果,可以对网页的内容进行进一步的处理,例如存储到数据库、生成报告等。

    以下是修改后的代码示例:

    import requests
    from bs4 import BeautifulSoup
    import re
    
    # 获取网页内容
    url = 'http://example.com'
    response = requests.get(url)
    html_content = response.text
    
    # 使用BeautifulSoup解析HTML标签
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # 提取需要的数据
    link_tag = soup.find('link', href="/statics/modules/channel/channel.css")
    div_tag = soup.find('div', class_="row", id="feedApp")
    script_tags = soup.find_all('script')
    
    # 提取搜索结果
    def doChannelSearch(page=1):
        # 省略部分代码...
    
    # 高亮显示关键词
    def highlightAndReplace(originalString, searchString):
        regex = re.compile(searchString, re.IGNORECASE)
        replacedString = regex.sub('<span style="color:red;">\g<0></span>', originalString)
        return replacedString
    

    在这段代码中,我们使用了Requests库和BeautifulSoup库来进行网页内容的获取和解析。通过调用相应的方法,可以提取出链接、<div>标签、<script>标签等。同时,我们也重新实现了highlightAndReplace()函数,使用正则表达式来高亮显示关键词。通过使用这些库和方法,我们可以对网页内容进行解析和处理。

    需要注意的是,示例代码中只是对网页内容进行了初步的解析和处理,具体的解析逻辑和数据提取需要根据实际情况进行调整。同时,还需要了解网页的具体结构和内容布局,以确保解析的准确性。

    希望以上解决方案对你有帮助!

    评论

报告相同问题?

问题事件

  • 已结题 (查看结题原因) 5月2日
  • 创建了问题 4月19日

悬赏问题

  • ¥15 Windows X86 远线程注入问题解惑
  • ¥15 Vs2022安装时黑框闪退无反应
  • ¥15 嵌入式设备网口down后再up时不能link?
  • ¥15 关于区块链和边缘计算
  • ¥15 做一个简单项目,用Python分析共享单车的数据
  • ¥20 在使用CESM2.2.0模型进行case.submit过程中出现如下错误(关键词-Map)
  • ¥15 有办法改变通过wifi进入的网站的设置吗
  • ¥15 label_studio
  • ¥15 请教如何phython发邮件
  • ¥15 linux系统安装问题