全文检索服务SOLR3.3支持IK汉语分词

全文检索服务SOLR3.3支持IK中文分词

转自:http://sinykk.iteye.com/blog/1171098

 

下载如下三个软件【请注意版本】

1:Solr的下载地址
http://labs.renren.com/apache-mirror//lucene/solr/3.3.0/apache-solr-3.3.0.zip 【使用 D:\solr\apache-solr-3.3.0\example\solr文件夹里的内容】
2:Tomcat的下载地址
http://apache.etoak.com/tomcat/tomcat-6/v6.0.33/bin/apache-tomcat-6.0.33.tar.gz 【使用其运行JAVA WEB】
3:IKAnalyzer下载地址
http://ik-analyzer.googlecode.com/files/IKAnalyzer3.2.8%20bin.zip 【只使用其IKAnalyzer3.2.5Stable.jar】

提前是安装好JDK1.6+,并将每个软件解压到 /sinykk 里


1、将TOMCAT解压到 /usr/local/apache-tomcat-6.0.33/

2、将 /solr/apache-solr-3.3.0/example/solr 文件拷贝到 /usr/local/apache-tomcat-6.0.33/

3、然后修改TOMCAT的/usr/local/apache-tomcat-6.0.33/conf/server.xml【增加中文支持】

 <Connector port="8983" protocol="HTTP/1.1" 
               connectionTimeout="20000" 
               redirectPort="8443" URIEncoding="UTF-8"/>

 

 4、添加文件 /usr/local/apache-tomcat-6.0.33/conf/Catalina/localhost/solr.xml 内容如下

 

 

<?xml version="1.0" encoding="UTF-8"?>
<Context docBase="/usr/local/apache-tomcat-6.0.33/webapps/solr" debug="0" crossContext="true" >
   <Environment name="solr/home" type="java.lang.String" value="/usr/local/apache-tomcat-6.0.33/solr" override="true" />
</Context>
 

5、将/sinykk/solr/apache-solr-3.3.0/example/webapps/solr.war文件放到/usr/local/apache-tomcat-6.0.33/webapps文件夹下,并启动TOMCAT

6、将/sinykk/solr/IKAnalyzer3.2.8.jar 文件放到/usr/local/apache-tomcat-6.0.33/webapps/solr/WEB-INF/lib 目录下


7、修改/usr/local/apache-tomcat-6.0.33/solr/conf/schema.xml文件为

 

<?xml version="1.0" encoding="UTF-8" ?>
<schema name="example" version="1.4">
 <types>
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
	 <!-- 
	<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>
	-->

	 <fieldType name="textik" class="solr.TextField" >
               <analyzer class="org.wltea.analyzer.lucene.IKAnalyzer"/>  
       
               <analyzer type="index">  
                   <tokenizer class="org.wltea.analyzer.solr.IKTokenizerFactory" isMaxWordLength="false"/>  
                   <filter class="solr.StopFilterFactory"  
                           ignoreCase="true" words="stopwords.txt"/>  
                   <filter class="solr.WordDelimiterFilterFactory"  
                           generateWordParts="1"  
                           generateNumberParts="1"  
                           catenateWords="1"  
                           catenateNumbers="1"  
                           catenateAll="0"  
                           splitOnCaseChange="1"/>  
                   <filter class="solr.LowerCaseFilterFactory"/>  
                   <filter class="solr.EnglishPorterFilterFactory"  
                       protected="protwords.txt"/>  
                   <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>  
               </analyzer>  
     			<analyzer type="query">  
                   <tokenizer class="org.wltea.analyzer.solr.IKTokenizerFactory" isMaxWordLength="false"/>  
                   <filter class="solr.StopFilterFactory"  
                           ignoreCase="true" words="stopwords.txt"/>  
                   <filter class="solr.WordDelimiterFilterFactory"  
                           generateWordParts="1"  
                           generateNumberParts="1"  
                           catenateWords="1"  
                           catenateNumbers="1"  
                           catenateAll="0"  
                           splitOnCaseChange="1"/>  
                   <filter class="solr.LowerCaseFilterFactory"/>  
                   <filter class="solr.EnglishPorterFilterFactory"  
                       protected="protwords.txt"/>  
                   <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>  
               </analyzer>  
       
</fieldType>
 </types>


 <fields>
  <field name="id" type="string" indexed="true" stored="true" required="true" /> 
 </fields>

 <uniqueKey>id</uniqueKey>

</schema>

 

 

最后运行http://192.168.171.129:8983/solr/admin/analysis.jsp

效果图如下

 


全文检索服务SOLR3.3支持IK汉语分词