doushai4890 2019-02-04 15:56
浏览 275

如何修复“仅在文档开头允许的XML声明”

I am trying to generate a sitemap but somehow an extra DIV tag at the initial line of xml. I need to remove this wrong tag DIV from the xml output.

I've tried to gather the logic at first and segregate the generation of the xml side at the bottom. set header 'text/xml'. I tried to strip_tags the whole xml string before output, but then, it shows document empty

private function removeImageAndEmbeds ( $content )
{
    // remove img tags
    $re1='(<img).*?\\/.*?\\/.*?\\/.*?\\/.*?\\/.*?\\/.*?(\\/>)';
    if ( $c=preg_replace("/".$re1."/is", "", $content) ) $content = $c;

    // remove embedded tags
    $re2='(<div).*?(data-oembed-url=)(".*?").*?<\\/div>.*?(<\\/div>)';
    if ( $c=preg_replace("/".$re2."/is", "", $content) ) $content = $c;

    return $content;
}

public function sitemaps ($tenantName="") {

    if ( !empty($tenantName) ) {

            $this->db->like( 't.name', str_replace('-', ' ', rawurldecode($tenantName)), 'none' );
            $results = $this->db->get($this->TBL . ' t')->result_array();

            foreach ( $results as $result ) {

                $tenantId = $result['id'];
                $tenantNameinURL = formatTenantNameinURL( $result['name'] );

                $AllItems =  $this->db->get_where($this->DIVIEW . ' di', 'di.account_id = '. $tenantId)->result_array();

                $topics = [];
                $itemIds = [];
                $ddIds = [];
                $urls = [];
                foreach ( $AllItems as $k => $item ) {
                    $pieces = explode('_', $item['id']);
                    if ( $pieces[1] === $this->ITEMTBL ) {
                        if( !in_array($item['record_id'], $itemIds) ){

                            $itemIds[] = $item['record_id'];
                            $content = $this->removeImageAndEmbeds( $item['content'] );

                            $AllItems[$k]['content'] = $content;
                            $topics[$k][] = $AllItems[$k];
                            $urls[$k]['url'] = formatFrontEndURL( $this->current_class_name, $tenantName, 'show', $pieces[0] );
                        }
                    } else if ( $pieces[1] === 'dataDefinitions' ) {
                        if( !in_array($item['record_id'], $ddIds) ){

                            $ddIds[] = $item['record_id'];
                            $content = $this->removeImageAndEmbeds( $item['content'] );

                            $AllItems[$k]['content'] = $content;
                            $topics[$k][] = $AllItems[$k];
                            $urls[$k]['url'] = formatFrontEndURL( $this->current_class_name, $tenantName, 'data_definition', $pieces[0] );
                        }
                    }
                }

                $urlset = new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><urlset />');
                $urlset->addAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9');
                foreach ($topics as $i => $itemsInTopic) {

                    $url = $urlset->addChild('url');
                    $url->loc = $urls[$i]['url'];

                    $pageMap = $url->addChild('PageMap');
                    $pageMap->addAttribute('xmlns', 'http://www.google.com/schemas/sitemap-pagemap/1.0');
                    foreach ( $itemsInTopic as $item ) {

                        $content = $item['content'];
                        $content = trim( str_replace(["&nbsp;","","
","\t", "&#13;", "&#10;"], ' ', strip_tags( utf8_decode( $content ) )) );
                        $dataObject = $pageMap->addChild('DataObject');
                        $dataObject->addAttribute('type', 'document');
                        $dataObject->addAttribute('id', $item['record_id']);
                        $dataObject->Attribute[0]['name'] = 'title';
                        $dataObject->Attribute[0] = $item['title'];
                        $dataObject->Attribute[1]['name'] = 'content';
                        $dataObject->Attribute[1] = $content;
                    }
                }

                $xmlContent = $urlset->asXML();
                $this->output->set_content_type('text/xml')->set_output( $xmlContent );
            }
        }
  }

here are two errors generated from seochat validator https://drive.google.com/file/d/1vacmuJL6hnMErzqZ5zZWkkObT74rKOmT/view?usp=sharing https://drive.google.com/file/d/1y3z85D1WtJIT9GvOC-DeYwS-DtQCAxK5/view?usp=sharing

here is google console error https://drive.google.com/file/d/1qMvifyjGILqAjJzdWdc90jyymvdUFV5A/view?usp=sharing

  • 写回答

0条回答

    报告相同问题?

    悬赏问题

    • ¥15 #MATLAB仿真#车辆换道路径规划
    • ¥15 java 操作 elasticsearch 8.1 实现 索引的重建
    • ¥15 数据可视化Python
    • ¥15 要给毕业设计添加扫码登录的功能!!有偿
    • ¥15 kafka 分区副本增加会导致消息丢失或者不可用吗?
    • ¥15 微信公众号自制会员卡没有收款渠道啊
    • ¥100 Jenkins自动化部署—悬赏100元
    • ¥15 关于#python#的问题:求帮写python代码
    • ¥20 MATLAB画图图形出现上下震荡的线条
    • ¥15 关于#windows#的问题:怎么用WIN 11系统的电脑 克隆WIN NT3.51-4.0系统的硬盘