转 lucene3搜索引擎,目录建立搜索排序分页高亮显示, IKAnalyzer分词

转 lucene3搜索引擎,索引建立搜索排序分页高亮显示, IKAnalyzer分词
Java代码
package com.zjr.service.impl;   
   
import java.io.File;   
import java.io.IOException;   
import java.io.StringReader;   
import java.lang.reflect.InvocationTargetException;   
import java.util.ArrayList;   
import java.util.List;   
   
import org.apache.commons.beanutils.BeanUtils;   
import org.apache.commons.logging.Log;   
import org.apache.commons.logging.LogFactory;   
import org.apache.lucene.analysis.Analyzer;   
import org.apache.lucene.analysis.TokenStream;   
import org.apache.lucene.document.Document;   
import org.apache.lucene.document.Field;   
import org.apache.lucene.document.Field.Index;   
import org.apache.lucene.document.Field.Store;   
import org.apache.lucene.index.CorruptIndexException;   
import org.apache.lucene.index.IndexReader;   
import org.apache.lucene.index.IndexWriter;   
import org.apache.lucene.index.Term;   
import org.apache.lucene.search.BooleanClause;   
import org.apache.lucene.search.IndexSearcher;   
import org.apache.lucene.search.Query;   
import org.apache.lucene.search.ScoreDoc;   
import org.apache.lucene.search.Sort;   
import org.apache.lucene.search.SortField;   
import org.apache.lucene.search.TopDocs;   
import org.apache.lucene.search.TopScoreDocCollector;   
import org.apache.lucene.search.highlight.Highlighter;   
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;   
import org.apache.lucene.search.highlight.QueryScorer;   
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;   
import org.apache.lucene.store.Directory;   
import org.apache.lucene.store.FSDirectory;   
import org.wltea.analyzer.lucene.IKAnalyzer;   
import org.wltea.analyzer.lucene.IKQueryParser;   
import org.wltea.analyzer.lucene.IKSimilarity;   
   
import com.zjr.model.User;   
   
public class UserIndexService {   
   
    private final Log logger = LogFactory.getLog(UserIndexService.class);   
    private final String dirPath = "d:/temp/user";   
   
    Analyzer analyzer = new IKAnalyzer();   
    Directory directory = null;   
    IndexWriter writer = null;   
    IndexSearcher indexSearcher = null;   
   
    private void confirmDirs() {   
        File indexFile = new File(dirPath);   
        if (!indexFile.exists()) {   
            indexFile.mkdirs();   
        }   
        if (!indexFile.exists() || !indexFile.canWrite()) {   
            if (logger.isDebugEnabled())   
                logger.error("索引文件目录创建失败或不可写入!");   
        }   
    }   
   
    public void init() {   
        confirmDirs();   
        try {   
            File f = new File(dirPath);   
            directory = FSDirectory.open(f);   
               
        } catch (Exception e) {   
            if (logger.isDebugEnabled()) {   
                logger.error("解除索引文件锁定失败!" + e.getCause());   
            }   
        }   
    }   
   
    public void createIndex(List<User> userList) {   
        init();   
        try {   
               
//           第一个参数是存放索引目录有FSDirectory(存储到磁盘上)和RAMDirectory(存储到内存中),   
//          第二个参数是使用的分词器, 第三个:true,建立全新的索引,false,建立增量索引,第四个是建立的索引的最大长度   
            writer = new IndexWriter(directory, analyzer, true,IndexWriter.MaxFieldLength.LIMITED);   
            writer.setMergeFactor(500);   
            writer.setMaxBufferedDocs(155);   
            writer.setMaxFieldLength(Integer.MAX_VALUE);   
            writeIndex(writer, userList);   
            writer.optimize();   
            writer.close();   
        } catch (IOException e) {   
            // TODO Auto-generated catch block   
            e.printStackTrace();   
        }   
    }   
   
    public List<User> search(String keyword) {   
   
        File indexFile = new File(dirPath);   
        if (!indexFile.exists()) {   
            return null;   
        }   
        Directory dir;   
        try {   
            dir = FSDirectory.open(indexFile);   
            indexSearcher = new IndexSearcher(dir);   
            indexSearcher.setSimilarity(new IKSimilarity());   
            // 单字段查询,单条件查询   
            // Query query = IKQueryParser.parse("userInfo", keyword);   
   
            // 多字段,单条件查询   
            String[] fields = new String[] { "userInfo", "parameter1" };   
            Query query = IKQueryParser.parseMultiField(fields, keyword);   
   
            // 多字体,单条件,多BooleanClause.Occur[] flags , 查询条件的组合方式(Or/And)   
            // BooleanClause.Occur[]数组,它表示多个条件之间的关系,   
            // BooleanClause.Occur.MUST表示 and,   
            // BooleanClause.Occur.MUST_NOT表示not,   
            // BooleanClause.Occur.SHOULD表示or.   
            // String[] fields =new String[]{"userInfo","parameter1"};   
            // BooleanClause.Occur[] flags=new   
            // BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD};   
            // Query query = IKQueryParser.parseMultiField(fields,   
            // keyword,flags);   
   
            // //多Field,多条件查询分析   
            // String[] fields =new String[]{"userInfo","parameter1"};   
            // String[] queries = new String[]{keyword,keyword};   
            // Query query = IKQueryParser.parseMultiField(fields,queries);   
   
            // 多Field,多条件,多Occur 查询   
            // String[] fields =new String[]{"userInfo","parameter1"};   
            // String[] queries = new String[]{keyword,keyword};   
            // BooleanClause.Occur[] flags=new   
            // BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD};   
            // Query query =   
            // IKQueryParser.parseMultiField(fields,queries,flags);   
   
            // 搜索相似度最高的20条记录   
            TopDocs topDocs = indexSearcher.search(query, 20);   
            ScoreDoc[] hits = topDocs.scoreDocs;   
            return hitsToQuery(hits, query);   
   
        } catch (IOException e) {   
            // TODO Auto-generated catch block   
            e.printStackTrace();   
        }   
   
        return null;   
    }   
   
    private List<User> hitsToQuery(ScoreDoc[] hits, Query query) {   
        List<User> list = new ArrayList<User>();   
        try {   
            for (int i = 0; i < hits.length; i++) {   
                User u = new User();   
                Document doc = indexSearcher.doc(hits[i].doc);   
                u.setUserId(Integer.parseInt(doc.get("userId")));   
                u.setUserName(doc.get("userName"));   
                u.setUserAge(Integer.parseInt(doc.get("userAge")));   
                // 高亮设置   
                SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter(   
                        "<font color=\"red\">", "</font>");   
                Highlighter highlighter = new Highlighter(simpleHtmlFormatter,   
                        new QueryScorer(query));   
                TokenStream tokenStream = analyzer.tokenStream("text",   
                        new StringReader(doc.get("userInfo")));   
                String userInfo = highlighter.getBestFragment(tokenStream, doc   
                        .get("userInfo"));   
                if (userInfo != null) {   
                    u.setUserInfo(userInfo);   
                } else {   
                    u.setUserInfo(doc.get("userInfo"));   
                }   
   
                SimpleHTMLFormatter simpleHtmlFormatter1 = new SimpleHTMLFormatter(   
                        "<font color=\"red\">", "</font>");   
                Highlighter highlighter1 = new Highlighter(   
                        simpleHtmlFormatter1, new QueryScorer(query));   
                TokenStream tokenStream1 = analyzer.tokenStream("text1",   
                        new StringReader(doc.get("parameter1")));   
                String p1 = highlighter1.getBestFragment(tokenStream1, doc   
                        .get("parameter1"));   
                if (p1 != null) {   
                    u.setParameter1(p1);   
                } else {   
                    u.setParameter1(doc.get("parameter1"));   
                }   
   
                u.setParameter2(doc.get("parameter2"));   
                u.setParameter3(doc.get("parameter3"));   
                u.setParameter4(doc.get("parameter4"));   
                list.add(u);   
            }   
   
            indexSearcher.close();   
            return list;   
        } catch (CorruptIndexException e) {   
            // TODO Auto-generated catch block   
            e.printStackTrace();   
        } catch (IOException e) {   
            // TODO Auto-generated catch block   
            e.printStackTrace();   
        } catch (InvalidTokenOffsetsException e) {   
            // TODO Auto-generated catch block   
            e.printStackTrace();   
        }   
        return null;   
    }   
   
    public void writeIndex(IndexWriter writer, List<User> userList) {   
   
        try {   
            for (User u : userList) {   
                Document doc = getDoc(u);   
                writer.addDocument(doc);   
            }   
        } catch (IOException e) {   
            // TODO Auto-generated catch block   
            e.printStackTrace();   
        }   
   
    }   
   
    private Document getDoc(User user) {   
        System.out.println("用户ID 为" + user.getUserId() + " 索引被创建");   
        Document doc = new Document();   
        addField2Doc(doc, user, "userId", Store.YES, Index.NOT_ANALYZED);   
        addField2Doc(doc, user, "userName", Store.YES, Index.NOT_ANALYZED);// Index.NOT_ANALYZED   
                                                                            // 不分词,但建立索引   
        addField2Doc(doc, user, "userAge", Store.YES, Index.NOT_ANALYZED);// Index.ANALYZED   
                                                                            // 分词并且建立索引   
        addField2Doc(doc, user, "userInfo", Store.YES, Index.ANALYZED);   
        addField2Doc(doc, user, "parameter1", Store.YES, Index.ANALYZED);   
        addField2Doc(doc, user, "parameter2", Store.YES, Index.ANALYZED);   
        addField2Doc(doc, user, "parameter3", Store.YES, Index.ANALYZED);   
        addField2Doc(doc, user, "parameter4", Store.YES, Index.ANALYZED);   
        return doc;   
    }   
   
    private void addField2Doc(Document doc, Object bean, String name, Store s,   
            Index i) {   
        String value;   
        try {   
            value = BeanUtils.getProperty(bean, name);   
            if (value != null) {   
                doc.add(new Field(name, value, s, i,   
                        Field.TermVector.WITH_POSITIONS_OFFSETS));   
            }   
        } catch (IllegalAccessException e) {   
            logger.error("get bean property error", e);   
        } catch (InvocationTargetException e) {   
            logger.error("get bean property error", e);   
        } catch (NoSuchMethodException e) {   
            logger.error("get bean property error", e);   
        }   
    }   
   
    /** 
     * 没有排序,有高亮,有分页 
     *  
     * @param pageNo 
     * @param pageSize 
     * @param keyword 
     * @return 
     */   
    public PageBean getPageQuery(int pageNo, int pageSize, String keyword) {   
        List result = new ArrayList();   
        File indexFile = new File(dirPath);   
        if (!indexFile.exists()) {   
            return null;   
        }   
        Directory dir;   
        try {   
            dir = FSDirectory.open(indexFile);   
            indexSearcher = new IndexSearcher(dir);   
            indexSearcher.setSimilarity(new IKSimilarity());   
   
            String[] fields = new String[] { "userInfo", "parameter1" };   
            BooleanClause.Occur[] flags = new BooleanClause.Occur[] {   
                    BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD };   
            Query query = IKQueryParser.parseMultiField(fields, keyword, flags);   
   
            TopScoreDocCollector topCollector = TopScoreDocCollector.create(   
                    indexSearcher.maxDoc(), true);   
            indexSearcher.search(query, topCollector);   
            // 查询当页的记录   
            ScoreDoc[] docs = topCollector.topDocs((pageNo - 1) * pageSize,   
                    pageSize).scoreDocs;   
   
            // String[] highlightCol = {"userInfo", "parameter1"};   
            // 高亮设置   
            SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter(   
                    "<font color=\"red\">", "</font>");   
            Highlighter highlighter = new Highlighter(simpleHtmlFormatter,   
                    new QueryScorer(query));   
   
            for (ScoreDoc scdoc : docs) {   
                User u = new User();   
                Document doc = indexSearcher.doc(scdoc.doc);   
                //                 
                // for (Fieldable fa : doc.getFields()) {   
                // System.out.println(fa.name());   
                // String value = doc.get(fa.name());   
                // for (String col : highlightCol) {   
                // if(fa.name().equals(col)) {   
                // //设置高显内容   
                // TokenStream tokenStream = analyzer.tokenStream("text",new   
                // StringReader(value));   
                // value = highlighter.getBestFragment(tokenStream, value);   
                // }   
                // }   
                //                     
                // }   
   
                u.setUserId(Integer.parseInt(doc.get("userId")));   
                u.setUserName(doc.get("userName"));   
                u.setUserAge(Integer.parseInt(doc.get("userAge")));   
   
                TokenStream tokenStream = analyzer.tokenStream("text",   
                        new StringReader(doc.get("userInfo")));   
                String userInfo = highlighter.getBestFragment(tokenStream, doc   
                        .get("userInfo"));   
                if (userInfo != null) {   
                    u.setUserInfo(userInfo);   
                } else {   
                    u.setUserInfo(doc.get("userInfo"));   
                }   
   
                TokenStream tokenStream1 = analyzer.tokenStream("text1",   
                        new StringReader(doc.get("parameter1")));   
                String p1 = highlighter.getBestFragment(tokenStream1, doc   
                        .get("parameter1"));   
                if (p1 != null) {   
                    u.setParameter1(p1);   
                } else {   
                    u.setParameter1(doc.get("parameter1"));   
                }   
   
                u.setParameter2(doc.get("parameter2"));   
                u.setParameter3(doc.get("parameter3"));   
                u.setParameter4(doc.get("parameter4"));   
                result.add(u);   
   
            }   
            PageBean pb = new PageBean();   
            pb.setCurrentPage(pageNo);// 当前页   
            pb.setPageSize(pageSize);   
            pb.setAllRow(topCollector.getTotalHits());// hit中的记录数目   
            pb.setList(result);   
            return pb;   
   
        } catch (IOException e) {   
            // TODO Auto-generated catch block   
            e.printStackTrace();   
        } catch (InvalidTokenOffsetsException e) {   
            // TODO Auto-generated catch block   
            e.printStackTrace();   
        }   
   
        return null;   
    }   
   
    /** 
     * 排序,有高亮,有分页 
     *  
     * @param pageNo 
     * @param pageSize 
     * @param keyword 
     * @return 
     */   
    public PageBean getPageQuery2(int pageNo, int pageSize, String keyword) {   
        List result = new ArrayList();   
        File indexFile = new File(dirPath);   
        if (!indexFile.exists()) {   
            return null;   
        }   
        Directory dir;   
        try {   
            dir = FSDirectory.open(indexFile);   
            indexSearcher = new IndexSearcher(dir);   
            indexSearcher.setSimilarity(new IKSimilarity());   
   
            String[] fields = new String[] { "userInfo", "parameter1" };   
            BooleanClause.Occur[] flags = new BooleanClause.Occur[] {   
                    BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD };   
            Query query = IKQueryParser.parseMultiField(fields, keyword, flags);   
   
            // 多字段排序,设置在前面的会优先排序   
            SortField[] sortFields = new SortField[2];   
            SortField sortField = new SortField("userId", SortField.INT, false);//false升序,true降序   
            SortField FIELD_SEX = new SortField("userAge", SortField.INT, true);   
            sortFields[0] = sortField;   
            sortFields[1] = FIELD_SEX;   
            Sort sort = new Sort(sortFields);   
   
            TopDocs topDocs = indexSearcher.search(query, null, 50, sort);   
   
            if (topDocs.totalHits != 0) {   
                // for(ScoreDoc sd : topDocs.scoreDocs) {   
                //                     
                // }   
                // 高亮设置   
                SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");   
                Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));   
   
                for (int i = (pageNo - 1) * pageSize; i < pageSize * pageNo; i++) {   
                    ScoreDoc scdoc = topDocs.scoreDocs[i];   
                    User u = new User();   
                    Document doc = indexSearcher.doc(scdoc.doc);   
                    u.setUserId(Integer.parseInt(doc.get("userId")));   
                    u.setUserName(doc.get("userName"));   
                    u.setUserAge(Integer.parseInt(doc.get("userAge")));   
                    TokenStream tokenStream = analyzer.tokenStream("text",new StringReader(doc.get("userInfo")));   
                    String userInfo = highlighter.getBestFragment(tokenStream,doc.get("userInfo"));   
                    if (userInfo != null) {   
                        u.setUserInfo(userInfo);   
                    } else {   
                        u.setUserInfo(doc.get("userInfo"));   
                    }   
   
                    TokenStream tokenStream1 = analyzer.tokenStream("text1",new StringReader(doc.get("parameter1")));   
                    String p1 = highlighter.getBestFragment(tokenStream1, doc.get("parameter1"));   
                    if (p1 != null) {   
                        u.setParameter1(p1);   
                    } else {   
                        u.setParameter1(doc.get("parameter1"));   
                    }   
   
                    u.setParameter2(doc.get("parameter2"));   
                    u.setParameter3(doc.get("parameter3"));   
                    u.setParameter4(doc.get("parameter4"));   
                    result.add(u);   
   
                }   
                PageBean pb = new PageBean();   
                pb.setCurrentPage(pageNo);// 当前页   
                pb.setPageSize(pageSize);   
                pb.setAllRow(topDocs.totalHits);// hit中的记录数目   
                pb.setList(result);   
                return pb;   
   
            }   
        } catch (IOException e) {   
            // TODO Auto-generated catch block   
            e.printStackTrace();   
        } catch (InvalidTokenOffsetsException e) {   
            // TODO Auto-generated catch block   
            e.printStackTrace();   
        }   
   
        return null;   
    }   
       
    /** 
     * 删除索引 
     * @param userId 
     */   
    public void deleIndex(String userId){   
           
        try {   
            File f = new File(dirPath);   
            directory = FSDirectory.open(f);   
            IndexReader reader = IndexReader.open(directory,false);    
            Term term = new Term("userId", userId);    
            reader.deleteDocuments(term);   
            reader.close();    
        } catch (IOException e) {   
            // TODO Auto-generated catch block   
            e.printStackTrace();   
        }   
           
           
    }   
   


高亮设置集成抽取成一个方法
Java代码 
public String toHighlighter(Query query,Document doc,String field){ 
        try { 
            SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); 
            Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query)); 
            TokenStream tokenStream1 = analyzer.tokenStream("text",new StringReader(doc.get(field))); 
            String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field)); 
            
            return highlighterStr == null ? doc.get(field):highlighterStr; 
        } catch (IOException e) { 
            // TODO Auto-generated catch block 
            e.printStackTrace(); 
        } catch (InvalidTokenOffsetsException e) { 
            // TODO Auto-generated catch block 
            e.printStackTrace(); 
        } 
        return null;