dph87312
dph87312
采纳率100%
2015-10-22 04:54

在一个核心Apache SOLR中索引多个实体

已采纳

I am having difficulty in indexing multiple entities in one collection. When I try to index only the entity defined at last gets index.

The below are the config :

data-config.xml

<?xml version="1.0" encoding="UTF-8"?>
<dataConfig> <propertyWriter dateFormat="yyyy-MM-dd HH:mm:ss" type="SimplePropertiesWriter" filename="demo.properties" />
<script><![CDATA[
id = 1;
function GenerateId(row) {
row.put('doc_id', (id ++).toFixed());
return row;
}
]]>
</script>

<dataSource type="JdbcDataSource" zeroDateTimeBehavior="convertToNull" name="ds-1" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://127.0.0.1/demo"   batchSize="-1" user="root" autoCommit="true" password="xxxxxxx">
</dataSource >

<document name="demo_doc">

   <entity name="table1" pk="table1_id" dataSource="ds-1"  transformer="HTMLStripTransformer,RegexTransformer,TemplateTransformer,DateFormatTransformer,script:GenerateId,LogTransformer"
        logTemplate="The demo is ${table1.table1_id}" logLevel="info"
        query="select table1_id,table1_desc,table1_flag,DATE_FORMAT(table1_date_updated,'%Y-%m-%dT%TZ') from table1 Where table1_flag=1 AND '${dih.request.clean}' != 'false' OR table1_date_updated &gt; '${dih.table1.last_index_time}'"
        >
        <field column="doc_id" name="singlekey" />
        <field column="doc_type" template="TABLE1" name="doc_type" />
        <field column="table1_desc" name="solr_table1_desc_en" stripHTML="true"/>
        <field column="table1_date_updated" name="solr_table1_date_updated_dt" dateTimeFormat="yyyy-MM-dd'T'HH:mm:ss" locale="en" />
</entity>
<entity name="table2"
    pk="table2_id" 
    dataSource="ds-1"
    transformer="HTMLStripTransformer,RegexTransformer,TemplateTransformer,DateFormatTransformer,script:GenerateId,LogTransformer"
    logTemplate="The table2 is ${table2.table2_id}" logLevel="info"
    query="select table2_id,table2_name,table2_flag,DATE_FORMAT(table2_date_updated,'%Y-%m-%dT%TZ') from table2 Where table2_flag=1 AND '${dih.request.clean}' != 'false' OR table2_date_updated &gt; '${dih.table2.last_index_time}'"
    >
    <field column="doc_id" name="singlekey" />
    <field column="doc_type" template="TABLE2" name="doc_type" />
    <field column="table2_name" name="solr_table2_name" />
        <entity name="table3" 
                pk="table3_id,table3_frid" 
                transformer="HTMLStripTransformer,RegexTransformer,DateFormatTransformer,script:GenerateId,LogTransformer"
                logTemplate="The table3 is ${table3.table3_id}" logLevel="info"
                query="select table3_id,table3_frid,table3_name,table3_desc,table3_subdesc,table3_keyword,table3_flag,DATE_FORMAT(table3_date_updated,'%Y-%m-%dT%TZ') from table3 Where  table3_frid=$table1.table1_id} AND table3_flag=1"
                >
            <field column="table3_name" name="solr_table3_name"/>
            <field column="table3_desc" name="solr_table3_desc" stripHTML="true"/>
            <field column="table3_subdesc" name="solr_table3_subdesc" stripHTML="true"/>
            <field column="table3_keyword" name="solr_table3_keyword"/>
            <field column="table3_date_updated" name="solr_table3_date_updated_dt" dateTimeFormat="yyyy-MM-dd'T'HH:mm:ss" locale="en"/>
       </entity>
    </entity>

 </document>

schema.xml

<field name="singlekey" type="string" required="true" multiValued="false" /> 
<field name="doc_type" type="string"  multiValued="false" />

<uniqueKey>singlekey</uniqueKey>

<field name="table1_desc_en"  type="text_auto" indexed="true" stored="true"  multiValued="false"  />
<field name="table1_date_updated_dt" type="date" indexed="true" stored="true"  multiValued="false"  />


<field name="table2_name" type="text_ws" indexed="true" stored="true" />
<field name="table3_name" type="text_ws" indexed="true" stored="true" multiValued="true" />
<field name="table3_desc"   type="text_en_splitting" indexed="true" stored="true" multiValued="true" />
<field name="table3_subdesc" type="text_en" indexed="true" stored="true" multiValued="true" />
<field name="table3_keyword" type="text_en" indexed="true" stored="true" multiValued="true" />
<field name="table3_date_updated_dt" type="date" indexed="true" multiValued="false" stored="true" />

I am not able to index the table 1, instead the table 2 and table 3 (which are 1 to many relationship tables) are getting indexed but table 1 is not getting indexed..

  • 点赞
  • 写回答
  • 关注问题
  • 收藏
  • 复制链接分享
  • 邀请回答

1条回答

  • dongtun1209 dongtun1209 6年前

    the issue has been fixed by adding the below lines in table 1 :

    <entity name="table1" 
    pk="table1_id" 
    dataSource="ds-1"   
    transformer="HTMLStripTransformer,RegexTransformer,TemplateTransformer,DateForma    tTransformer,script:GenerateId,LogTransformer"            
    logTemplate="The demo is ${table1.table1_id}" 
    logLevel="info"
    query="select table1_id,table1_desc,table1_flag,DATE_FORMAT(table1_date_updated,'%Y-%m-%dT%TZ') from table1 Where table1_flag=1 AND '${dih.request.clean}' != 'false' OR table1_date_updated &gt; '${dih.table1.last_index_time}'" 
    deltaImportQuery="select table1_id,table1_desc,table1_flag,DATE_FORMAT(table1_date_updated,'%Y-%m-%dT%TZ') as table1_date_updated from table1 Where table1_id='${dih.delta.id}'"
    deltaQuery="select  table1_id,table1_desc,table1_flag,DATE_FORMAT(table1_date_updated,'%Y-%m-%dT%TZ') as table1_date_updated from table1 Where table1_date_updated &gt; '${dih.table1 .last_index_time}'"
    preImportDeleteQuery="select table1_id from table1  where table1_date_updated &gt; '${dih.table1 .last_index_time}'" 
    >               
    <field column="doc_id" name="singlekey" /> 
    <field column="doc_type" template="TABLE1" name="doc_type" /> 
    <field column="table1_desc" name="solr_table1_desc_en" stripHTML="true"/> 
    <field column="table1_date_updated" name="solr_table1_date_updated_dt" dateTimeFormat="yyyy-MM-dd'T'HH:mm:ss" locale="en" /> 
    </entity>
    
    点赞 评论 复制链接分享