doushai4890 2019-02-04 15:56
浏览 275

如何修复“仅在文档开头允许的XML声明”

I am trying to generate a sitemap but somehow an extra DIV tag at the initial line of xml. I need to remove this wrong tag DIV from the xml output.

I've tried to gather the logic at first and segregate the generation of the xml side at the bottom. set header 'text/xml'. I tried to strip_tags the whole xml string before output, but then, it shows document empty

private function removeImageAndEmbeds ( $content )
{
    // remove img tags
    $re1='(<img).*?\\/.*?\\/.*?\\/.*?\\/.*?\\/.*?\\/.*?(\\/>)';
    if ( $c=preg_replace("/".$re1."/is", "", $content) ) $content = $c;

    // remove embedded tags
    $re2='(<div).*?(data-oembed-url=)(".*?").*?<\\/div>.*?(<\\/div>)';
    if ( $c=preg_replace("/".$re2."/is", "", $content) ) $content = $c;

    return $content;
}

public function sitemaps ($tenantName="") {

    if ( !empty($tenantName) ) {

            $this->db->like( 't.name', str_replace('-', ' ', rawurldecode($tenantName)), 'none' );
            $results = $this->db->get($this->TBL . ' t')->result_array();

            foreach ( $results as $result ) {

                $tenantId = $result['id'];
                $tenantNameinURL = formatTenantNameinURL( $result['name'] );

                $AllItems =  $this->db->get_where($this->DIVIEW . ' di', 'di.account_id = '. $tenantId)->result_array();

                $topics = [];
                $itemIds = [];
                $ddIds = [];
                $urls = [];
                foreach ( $AllItems as $k => $item ) {
                    $pieces = explode('_', $item['id']);
                    if ( $pieces[1] === $this->ITEMTBL ) {
                        if( !in_array($item['record_id'], $itemIds) ){

                            $itemIds[] = $item['record_id'];
                            $content = $this->removeImageAndEmbeds( $item['content'] );

                            $AllItems[$k]['content'] = $content;
                            $topics[$k][] = $AllItems[$k];
                            $urls[$k]['url'] = formatFrontEndURL( $this->current_class_name, $tenantName, 'show', $pieces[0] );
                        }
                    } else if ( $pieces[1] === 'dataDefinitions' ) {
                        if( !in_array($item['record_id'], $ddIds) ){

                            $ddIds[] = $item['record_id'];
                            $content = $this->removeImageAndEmbeds( $item['content'] );

                            $AllItems[$k]['content'] = $content;
                            $topics[$k][] = $AllItems[$k];
                            $urls[$k]['url'] = formatFrontEndURL( $this->current_class_name, $tenantName, 'data_definition', $pieces[0] );
                        }
                    }
                }

                $urlset = new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><urlset />');
                $urlset->addAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9');
                foreach ($topics as $i => $itemsInTopic) {

                    $url = $urlset->addChild('url');
                    $url->loc = $urls[$i]['url'];

                    $pageMap = $url->addChild('PageMap');
                    $pageMap->addAttribute('xmlns', 'http://www.google.com/schemas/sitemap-pagemap/1.0');
                    foreach ( $itemsInTopic as $item ) {

                        $content = $item['content'];
                        $content = trim( str_replace(["&nbsp;","","
","\t", "&#13;", "&#10;"], ' ', strip_tags( utf8_decode( $content ) )) );
                        $dataObject = $pageMap->addChild('DataObject');
                        $dataObject->addAttribute('type', 'document');
                        $dataObject->addAttribute('id', $item['record_id']);
                        $dataObject->Attribute[0]['name'] = 'title';
                        $dataObject->Attribute[0] = $item['title'];
                        $dataObject->Attribute[1]['name'] = 'content';
                        $dataObject->Attribute[1] = $content;
                    }
                }

                $xmlContent = $urlset->asXML();
                $this->output->set_content_type('text/xml')->set_output( $xmlContent );
            }
        }
  }

here are two errors generated from seochat validator https://drive.google.com/file/d/1vacmuJL6hnMErzqZ5zZWkkObT74rKOmT/view?usp=sharing https://drive.google.com/file/d/1y3z85D1WtJIT9GvOC-DeYwS-DtQCAxK5/view?usp=sharing

here is google console error https://drive.google.com/file/d/1qMvifyjGILqAjJzdWdc90jyymvdUFV5A/view?usp=sharing

  • 写回答

0条回答 默认 最新

    报告相同问题?

    悬赏问题

    • ¥60 求一个简单的网页(标签-安全|关键词-上传)
    • ¥35 lstm时间序列共享单车预测,loss值优化,参数优化算法
    • ¥15 基于卷积神经网络的声纹识别
    • ¥15 Python中的request,如何使用ssr节点,通过代理requests网页。本人在泰国,需要用大陆ip才能玩网页游戏,合法合规。
    • ¥100 为什么这个恒流源电路不能恒流?
    • ¥15 有偿求跨组件数据流路径图
    • ¥15 写一个方法checkPerson,入参实体类Person,出参布尔值
    • ¥15 我想咨询一下路面纹理三维点云数据处理的一些问题,上传的坐标文件里是怎么对无序点进行编号的,以及xy坐标在处理的时候是进行整体模型分片处理的吗
    • ¥15 CSAPPattacklab
    • ¥15 一直显示正在等待HID—ISP