Lucene 3.0.1 全文检目录擎的架构 对文件,数据库建索引,及查询(高亮显示)
Lucene 3.0.1 全文检索引擎的架构 对文件,数据库建索引,及查询(高亮显示)
lucene是apache软件基金会4 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包,即它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。Lucene的目的是为软件开发人员提供一个简单易用的工具包,以方便的在目标系统中实现全文检索的功能,或者是以此为基础建立起完整的全文检索引擎。
查询 关键词 “唐山” 之后效果图:
对文件创建索引及查询
创建索引 Lucene 3.0(第一步)
搜索索引 Lucene 3.0(第二步)
对数据库创建索引及查询
建立数据库索引 lucene3.6
搜索索引 Lucene 3.0(第二步)
对文件索引的查询,和对数据库索引的查询是一样的
下面看看怎么删除指定索引
Lucene 3.0+ 删除索引
先撒泡尿,做个记号
lucene是apache软件基金会4 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包,即它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。Lucene的目的是为软件开发人员提供一个简单易用的工具包,以方便的在目标系统中实现全文检索的功能,或者是以此为基础建立起完整的全文检索引擎。
查询 关键词 “唐山” 之后效果图:
对文件创建索引及查询
创建索引 Lucene 3.0(第一步)
package com.gjw.lecence; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.util.Date; import jxl.Sheet; import jxl.Workbook; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Version; import org.textmining.text.extraction.WordExtractor; /** * 创建索引 Lucene 3.0(第一步) * * @author RenWeigang * * @version 2010.12.13 * */ public class Indexer { //保存索引文件的地方 private static String INDEX_DIR = "E:\\index"; //将要搜索TXT文件的地方 private static String DATA_DIR = "E:\\rr"; public static void main(String[] args) throws Exception { long start = new Date().getTime(); int numIndexed = index(new File(INDEX_DIR), new File(DATA_DIR)); long end = new Date().getTime(); System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds"); } /** * 索引dataDir下文件,并储存在indexDir下,返回索引的文件数量 * * @param indexDir * @param dataDir * @return * @throws IOException * isDirectory() 判断 */ public static int index(File indexDir, File dataDir) throws IOException { if (!dataDir.exists() || !dataDir.isDirectory()) { throw new IOException(dataDir + " does not exist or is not a directory"); } /** * 创建IndexWriter对象, * 第一个参数是Directory,也可以为:Directory dir = new SimpleFSDirectory(new File(indexDir)); * 第二个是分词器, * 这个IndexWriter是针对文件系统的 * 第三个参数是指: 如果指定为true,表示重新创建索引库,如果已存在,就删除后再创建; * 指定为false,表示追加(默认值) * 如果不存在,就抛异常. * 第四表示表示分词的最大值,比如说new MaxFieldLength(2), * 就表示两个字一分,一般用IndexWriter.MaxFieldLength.LIMITED * */ Directory dir = new SimpleFSDirectory(indexDir); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED); indexDirectory(writer, dataDir); //查看IndexWriter里面有多少个索引 int numIndexed = writer.numDocs(); writer.optimize();//优化 writer.commit();//提交 writer.close();//关闭 使其 不占用资源 return numIndexed; } /** * 循环遍历dir下的所有文件并进行索引 * * @param writer * @param dir * @throws IOException */ private static void indexDirectory(IndexWriter writer, File dir) throws IOException { File[] files = dir.listFiles(); for (int i = 0; i < files.length; i++) { if (files[i].isDirectory()) {//如果path表示的是一个目录则返回true //递归 indexDirectory(writer,files[i]); } else if(files[i].getName().endsWith(".txt")) { indexTxtFile(writer,files[i]); }else if(files[i].getName().endsWith(".doc")) { indexWordFile(writer,files[i]); }else if(files[i].getName().endsWith(".xls")) { indexExcelFile(writer,files[i]); } } } /** * 对excel2003文件进行索引 * 读取word文件有两种方法,用jacob包,可以修改生成word文件内容。 * 如果只读取word里的文本内容的话,可以用poi读取word文件, * 先到http://www.ibiblio.org/maven2/org/textmining/tm-extractors/ * 下载tm-extractors-0.4.jar包 * */ private static void indexExcelFile(IndexWriter writer, File f){ if (f.isHidden() || !f.exists() || !f.canRead()) { return; } Workbook rwb = null; try { InputStream is=new FileInputStream(f); //声名一个工作薄 rwb= Workbook.getWorkbook(is); //获得工作薄的个数 rwb.getNumberOfSheets(); //在Excel文档中,第一张工作表的缺省索引是0 Sheet st = rwb.getSheet(0); //通用的获取cell值的方式,getCell(int column, int row) 行和列 int rows=st.getRows(); int cols=st.getColumns(); System.out.println("当前工作表的名字:"+st.getName()); System.out.println("总行数:"+rows); System.out.println("总列数:"+cols); String content = ""; for(int i=0;i<rows;i++){ for (int j = 0; j < cols; j++) { content+=st.getCell(j,i).getContents(); } } //System.out.println("--------->>excel内容:"+content); Document doc = new Document(); doc.add(new Field("contents",content, Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("filename", f.getCanonicalPath(), Field.Store.YES,Field.Index.ANALYZED)); writer.addDocument(doc); } catch(Exception e) { e.printStackTrace(); System.out.println("出错了"); }finally{ rwb.close(); } } /** * 对word2003文件进行索引 * 读取word文件有两种方法,用jacob包,可以修改生成word文件内容。 * 如果只读取word里的文本内容的话,可以用poi读取word文件, * 先到http://www.ibiblio.org/maven2/org/textmining/tm-extractors/ * 下载tm-extractors-0.4.jar包 * */ private static void indexWordFile(IndexWriter writer, File f){ if (f.isHidden() || !f.exists() || !f.canRead()) { return; } try{ System.out.println("Indexing " + f.getCanonicalPath()); FileInputStream in = new FileInputStream (f); WordExtractor extractor = new WordExtractor(); String str = extractor.extractText(in); //System.out.println(str); //输出内容 Document doc = new Document(); doc.add(new Field("contents",str, Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("filename", f.getCanonicalPath(), Field.Store.YES,Field.Index.ANALYZED)); writer.addDocument(doc); }catch(Exception e){ e.printStackTrace(); } } /** * 对单个txt文件进行索引 * * @param writer * @param f * @throws IOException */ private static void indexTxtFile(IndexWriter writer, File f) throws IOException { if (f.isHidden() || !f.exists() || !f.canRead()) { return; } FileReader fr = new FileReader(f); BufferedReader br = new BufferedReader(fr); System.out.println("Indexing " + f.getCanonicalPath()); String content = ""; for (int i = 0; i < 30; i++) { if (br.readLine()!=null) { content=content+br.readLine(); } } Document doc = new Document(); doc.add(new Field("contents",content, Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("filename", f.getCanonicalPath(), Field.Store.YES,Field.Index.ANALYZED)); /** * Field.Index有五个属性,分别是: * Field.Index.ANALYZED:分词索引 * Field.Index.NOT_ANALYZED:分词进行索引,如作者名,日期等,Rod Johnson本身为一单词,不再需要分词。 * Field.Index.NO:不进行索引,存放不能被搜索的内容如文档的一些附加属性如文档类型, URL等。 * Field.Index.NOT_ANALYZED_NO_NORMS:不使用分词索引,不使用存储规则。 * Field.Index.ANALYZED_NO_NORMS:使用分词索引,不使用存储规则。 */ /** * 如果稍微留心就可以注意到,索引库的文件不是一层不变的, * cfs类型的文件在不停的有规律地增加,这个文件多了以后, * 会影响到搜索的效率,因为它要打开多个文件, * 所以我们又要想办法让它合并成一个文件 * */ writer.addDocument(doc); } }
搜索索引 Lucene 3.0(第二步)
package com.gjw.lecence; import java.io.File; import java.util.Date; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * 搜索索引 Lucene 3.0(第二步) * * @author RenWeigang * * @version 2010.12.13 * */ public class Searcher { //保存索引的地方 private static String INDEX_DIR = "E:\\index"; private static String KEYWORD = "高军威"; private static int TOP_NUM = 100; public static void main(String[] args) throws Exception { File indexDir = new File(INDEX_DIR); if (!indexDir.exists() || !indexDir.isDirectory()) { throw new Exception(indexDir + " does not exist or is not a directory."); } searchs(indexDir, KEYWORD); } /** * 多查詢 * * @param indexDir * 索引目录地址 * @param q * 要查询的字符串 * @throws Exception */ public static void searchs(File indexDir, String q) throws Exception { //创建 IndexSearcher对象,相比IndexWriter对象,这个参数就要提供一个索引的目录就行了 IndexSearcher indexSearch = new IndexSearcher(FSDirectory.open(indexDir), true);// read-only //在建立索引时,存在IndexWriter对象中的 /** * 创建QueryParser对象, * 第一个参数表示Lucene的版本, * 第二个表示搜索Field的字段, * 第三个表示搜索使用分词器 */ String[] fields = {"contents","filename"}; MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, fields, new StandardAnalyzer(Version.LUCENE_30)); //生成Query对象 Query query = parser.parse(q); //---------------------高亮显示--------------------------------------- /** * Formatter:设置高亮器的格式,无参构造函数表示使用<b>标签 * Scorer:Highlighter需要知道哪些关键词是需要高亮的,需要需要查询条件 */ Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>"); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter,scorer); //除了上面两个以外,还需要生成一段摘要,以便搜索的时候显示,指定摘要的大小为20个字符 Fragmenter fragmenter = new SimpleFragmenter(20); highlighter.setTextFragmenter(fragmenter); //------------------------------------------------------------ TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM,false); // start time long start = new Date().getTime(); indexSearch.search(query,collector); //搜索结果TopScoreDocCollector里面有 TopDocs,TopDocs里面有scoreDocs[]数组,里面保存着索引值. ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("找到了"+hits.length+"个"); //循环ScoreDoc数据,并使用indexSearch.doc方法把Document还原,再拿出对应的字段的值 for (int i = 0; i < hits.length; i++) { ScoreDoc scoreDoc = hits[i]; //浮点类型的得分 float score = scoreDoc.score; // new method is.doc() Document doc = indexSearch.doc(hits[i].doc); String text=highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_30),"filename", doc.get("filename")); String text2=highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_30),"contents", doc.get("contents")); if(text==null)text=doc.get("filename"); if(text2==null)text2=doc.get("contents"); System.out.println(text + "------相关度得分"+score+"------"+ hits[i].toString()+"\n内容:"+text2); } indexSearch.close(); // end time long end = new Date().getTime(); System.out.println("Found " + collector.getTotalHits() + " document(s) (in " + (end - start) + " milliseconds) that matched query '" + q + "':"); } /** * 查詢 * * @param indexDir * 索引目录地址 * @param q * 要查询的字符串 * @throws Exception */ public static void search(File indexDir, String q) throws Exception { //创建 IndexSearcher对象,相比IndexWriter对象,这个参数就要提供一个索引的目录就行了 IndexSearcher indexSearch = new IndexSearcher(FSDirectory.open(indexDir), true);// read-only //在建立索引时,存在IndexWriter对象中的 /** * 创建QueryParser对象, * 第一个参数表示Lucene的版本, * 第二个表示搜索Field的字段, * 第三个表示搜索使用分词器 */ QueryParser parser = new QueryParser(Version.LUCENE_30, "contents",new StandardAnalyzer(Version.LUCENE_30)); TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM,false); // start time long start = new Date().getTime(); Query query = parser.parse(q); indexSearch.search(query,collector); //---------------------高亮显示--------------------------------------- /** * Formatter:设置高亮器的格式,无参构造函数表示使用<b>标签 * Scorer:Highlighter需要知道哪些关键词是需要高亮的,需要需要查询条件 */ Formatter formatter = new SimpleHTMLFormatter(); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter,scorer); //除了上面两个以外,还需要生成一段摘要,以便搜索的时候显示,指定摘要的大小为20个字符 Fragmenter fragmenter = new SimpleFragmenter(20); highlighter.setTextFragmenter(fragmenter); //------------------------------------------------------------ //搜索结果TopScoreDocCollector里面有 TopDocs,TopDocs里面有scoreDocs[]数组,里面保存着索引值. ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("找到了"+hits.length+"个"); //循环ScoreDoc数据,并使用indexSearch.doc方法把Document还原,再拿出对应的字段的值 for (int i = 0; i < hits.length; i++) { ScoreDoc scoreDoc = hits[i]; //浮点类型的得分 float score = scoreDoc.score; // new method is.doc() Document doc = indexSearch.doc(hits[i].doc); highlighter.setTextFragmenter(new SimpleFragmenter(200)); String text=highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_30),"filename", doc.get("filename")); if(text==null)text=doc.get("filename"); System.out.println(text+ "------相关度得分"+score+"------"+ hits[i].toString()+"\n内容:"+doc.get("content")); } indexSearch.close(); // end time long end = new Date().getTime(); System.out.println("Found " + collector.getTotalHits() + " document(s) (in " + (end - start) + " milliseconds) that matched query '" + q + "':"); } }
对数据库创建索引及查询
建立数据库索引 lucene3.6
package com.gjw.DB; import java.io.File; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Version; /********************** * * 建立数据库索引 lucene3.6+ * */ public class DataBaseIndexer{ public static void main(String[] args) throws IOException,SQLException{ String indexDir = "e:\\index2"; DBConn conn = new DBConn(); conn.OpenConnection(); ResultSet rs = conn.ExecuteQuery("select * from CEC_VENDOR"); // 为表字段建立索引 Directory dir = new SimpleFSDirectory(new File(indexDir)); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED); while (rs.next()) { //System.out.println(rs.getString("vendor_name")); Document doc = new Document(); doc.add(new Field("vendor_name", rs.getString("vendor_name"),Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("vendor_address", rs.getString("vendor_address"),Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("vendor_id", rs.getString("vendor_id"),Field.Store.YES, Field.Index.ANALYZED)); //doc.add(new Field("indexDate",DateTools.dateToString(new Date(), DateTools.Resolution.DAY),Field.Store.YES,Field.Index.NOT_ANALYZED)); writer.addDocument(doc); } System.out.println("numDocs"+writer.numDocs()); writer.optimize(); writer.commit(); writer.close(); } }
搜索索引 Lucene 3.0(第二步)
package com.gjw.DB; import java.io.File; import java.util.Date; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * 搜索索引 Lucene 3.0(第二步) * * @author RenWeigang * * @version 2010.12.13 * */ public class Searcher { //保存索引的地方 private static String INDEX_DIR = "E:\\index2"; private static String KEYWORD = "搜索关键字"; private static int TOP_NUM = 100; public static void main(String[] args) throws Exception { File indexDir = new File(INDEX_DIR); if (!indexDir.exists() || !indexDir.isDirectory()) { throw new Exception(indexDir + " does not exist or is not a directory."); } searchs(indexDir, KEYWORD); } /** * 多查詢 * * @param indexDir * 索引目录地址 * @param q * 要查询的字符串 * @throws Exception */ public static void searchs(File indexDir, String q) throws Exception { //创建 IndexSearcher对象,相比IndexWriter对象,这个参数就要提供一个索引的目录就行了 IndexSearcher indexSearch = new IndexSearcher(FSDirectory.open(indexDir), true);// read-only //在建立索引时,存在IndexWriter对象中的 /** * 创建QueryParser对象, * 第一个参数表示Lucene的版本, * 第二个表示搜索Field的字段, * 第三个表示搜索使用分词器 */ String[] fields = {"vendor_name","vendor_id","vendor_address"}; MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, fields, new StandardAnalyzer(Version.LUCENE_30)); //生成Query对象 Query query = parser.parse(q); //---------------------高亮显示--------------------------------------- /** * Formatter:设置高亮器的格式,无参构造函数表示使用<b>标签 * Scorer:Highlighter需要知道哪些关键词是需要高亮的,需要需要查询条件 */ Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>"); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter,scorer); //除了上面两个以外,还需要生成一段摘要,以便搜索的时候显示,指定摘要的大小为20个字符 Fragmenter fragmenter = new SimpleFragmenter(20); highlighter.setTextFragmenter(fragmenter); //------------------------------------------------------------ TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM,false); // start time long start = new Date().getTime(); indexSearch.search(query,collector); //搜索结果TopScoreDocCollector里面有 TopDocs,TopDocs里面有scoreDocs[]数组,里面保存着索引值. ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("找到了"+hits.length+"个"); //循环ScoreDoc数据,并使用indexSearch.doc方法把Document还原,再拿出对应的字段的值 for (int i = 0; i < hits.length; i++) { ScoreDoc scoreDoc = hits[i]; //浮点类型的得分 float score = scoreDoc.score; // new method is.doc() Document doc = indexSearch.doc(hits[i].doc); String text=highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_30),"vendor_name", doc.get("vendor_name")); String text2=highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_30),"vendor_id", doc.get("vendor_id")); if(text==null)text=doc.get("vendor_name"); if(text2==null)text2=doc.get("vendor_id"); System.out.println(text + "------相关度得分"+score+"------"+ hits[i].toString()+"\n内容:"+text2); } indexSearch.close(); // end time long end = new Date().getTime(); System.out.println("Found " + collector.getTotalHits() + " document(s) (in " + (end - start) + " milliseconds) that matched query '" + q + "':"); } /** * 查詢 * * @param indexDir * 索引目录地址 * @param q * 要查询的字符串 * @throws Exception */ public static void search(File indexDir, String q) throws Exception { //创建 IndexSearcher对象,相比IndexWriter对象,这个参数就要提供一个索引的目录就行了 IndexSearcher indexSearch = new IndexSearcher(FSDirectory.open(indexDir), true);// read-only //在建立索引时,存在IndexWriter对象中的 /** * 创建QueryParser对象, * 第一个参数表示Lucene的版本, * 第二个表示搜索Field的字段, * 第三个表示搜索使用分词器 */ QueryParser parser = new QueryParser(Version.LUCENE_30, "contents",new StandardAnalyzer(Version.LUCENE_30)); TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM,false); // start time long start = new Date().getTime(); Query query = parser.parse(q); indexSearch.search(query,collector); //---------------------高亮显示--------------------------------------- /** * Formatter:设置高亮器的格式,无参构造函数表示使用<b>标签 * Scorer:Highlighter需要知道哪些关键词是需要高亮的,需要需要查询条件 */ Formatter formatter = new SimpleHTMLFormatter(); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter,scorer); //除了上面两个以外,还需要生成一段摘要,以便搜索的时候显示,指定摘要的大小为20个字符 Fragmenter fragmenter = new SimpleFragmenter(20); highlighter.setTextFragmenter(fragmenter); //------------------------------------------------------------ //搜索结果TopScoreDocCollector里面有 TopDocs,TopDocs里面有scoreDocs[]数组,里面保存着索引值. ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("找到了"+hits.length+"个"); //循环ScoreDoc数据,并使用indexSearch.doc方法把Document还原,再拿出对应的字段的值 for (int i = 0; i < hits.length; i++) { ScoreDoc scoreDoc = hits[i]; //浮点类型的得分 float score = scoreDoc.score; // new method is.doc() Document doc = indexSearch.doc(hits[i].doc); highlighter.setTextFragmenter(new SimpleFragmenter(200)); String text=highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_30),"filename", doc.get("filename")); if(text==null)text=doc.get("filename"); System.out.println(text+ "------相关度得分"+score+"------"+ hits[i].toString()+"\n内容:"+doc.get("content")); } indexSearch.close(); // end time long end = new Date().getTime(); System.out.println("Found " + collector.getTotalHits() + " document(s) (in " + (end - start) + " milliseconds) that matched query '" + q + "':"); } }
对文件索引的查询,和对数据库索引的查询是一样的
下面看看怎么删除指定索引
Lucene 3.0+ 删除索引
package com.gjw.lecence; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Version; /** * Lucene 3.0+ 删除索引 * @author Administrator * */ public class DeleteIndex { public static void main(String[] args) throws CorruptIndexException, IOException { //索引所放目录 String indexDir = "E:\\index"; //创建Directory //indexWriter的第三个参数true的创建索引或覆盖现有的一个;false添加到现有的指数 Directory dir = new SimpleFSDirectory(new File(indexDir)); IndexWriter indexWriter = new IndexWriter(dir,new StandardAnalyzer(Version.LUCENE_30),false,IndexWriter.MaxFieldLength.UNLIMITED); /* * 删除filename为time.txt的Document *Term类也是用来搜索的,构造函数的意思是:在哪个Field里面查哪个关键词 *然后调用IndexWriter的deleteDocument()方法删除包含指定Term的Document */ Term term = new Term("filename","6667"); indexWriter.deleteDocuments(term); //优化 //indexWriter.optimize(); //提交事务 indexWriter.commit(); System.out.println("是否有删除="+indexWriter.hasDeletions()); //如果不indexWriter.optimize()以下两个会有区别 System.out.println("一共有"+indexWriter.maxDoc()+"索引"); System.out.println("还剩"+indexWriter.numDocs()+"索引"); indexWriter.close(); } }
先撒泡尿,做个记号