Lucene 入门示范
Lucene 入门示例
通过敲写着连个例子,大概了解了lucene 的核心类以及主要api 的功能。
package Demo; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.LongField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class IndexFiles { private IndexFiles(){} /**Index all text files under a directory **/ /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = args[0]; boolean create = true; if(docsPath==null) { System.err.println("input the docsPath"); System.exit(1); } final File docDir = new File(docsPath); if(!docDir.exists()||!docDir.canRead()) { System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try{ // System.out.println("Indexing to directory '") Directory dir =FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); if(create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); writer.close(); Date end = new Date(); System.out.println(end.getTime()-start.getTime() + "total milliseconds"); }catch(IOException e) { System.out.println("caught a "+ e.getClass()+ "\n with message:" + e.getMessage()); } } /** * do not try to index files that cannot be read * @throws IOException */ static void indexDocs(IndexWriter writer,File file) throws IOException { if(file.canRead()) { if(file.isDirectory()) { String[] files = file.list(); if(files != null){ for(int i = 0;i < files.length;i++) { indexDocs(writer,new File(file,files[i])); } } } else{ FileInputStream fis; fis = new FileInputStream(file); try{ //make a new ,empty document Document doc = new Document(); Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis,"UTF-8")))); if(writer.getConfig().getOpenMode()==OpenMode.CREATE) { //new index so we just add the document (no old document can be there) System.out.println("adding " + file); writer.addDocument(doc); } else { //Existing index (an old copy of the document may have been indexed) // so we use updataDocument instead to replace the old one matching //the exact path,if present System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } }finally{ fis.close(); } } } } }
package Demo; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.nio.Buffer; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.xml.sax.InputSource; public class Searchfiles { /** * @param args * @throws IOException * @throws ParseException */ public static void main(String[] args) throws IOException, ParseException { // TODO Auto-generated method stub String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; // 打开索引所在的文件夹 IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); //根据indexReader 打开的索引文件 建立检索 IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); BufferedReader in = null; if(queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries),"UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in,"UTF-8")); } QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); while(true) { if(queries == null && queryString == null) { System.out.println("Enter query: "); } String line = queryString !=null ? queryString : in.readLine(); if(line==null || line.length()== -1)break; line = line.trim(); if(line.length()==0)break; Query query = parser.parse(line); System.out.println("Searching for : " + query.toString(field)); if(repeat > 0) //repeat & time as benchmark { Date start = new Date(); for(int i =0 ;i < repeat;i++) searcher.search(query, null,100); Date end = new Date(); System.out.println("Time: + " +(end.getTime() - start.getTime()) + "ms."); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString==null); if(queryString == null)break; } } public static void doPagingSearch(BufferedReader in , IndexSearcher searcher, Query query,int hitsPerPage,boolean raw ,boolean interactive ) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5*hitsPerPage); ScoreDoc[] hits =results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while(true) { if(end > hits.length){ System.out.println("Only results 1 - "+ hits.length + " of" + numTotalHits + "total matching documents collected ."); System.out.println("Collect more (y/n)?"); String line = in.readLine(); if(line.length()==0||line.charAt(0)=='n') { break; } hits =searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start+hitsPerPage); for(int i = start; i < end ;i++) { if(raw) // output raw format { System.out.println("doc="+hits[i].doc + " score= " + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if(path!=null) { System.out.println((i+1)+"."+path); String title = doc.get("title"); if(title!=null) { System.out.println(" Title:" + doc.get("title")); } }else{ System.out.println((i+1) + "." + "No path for this document"); } } if(!interactive || end==0) { break; } if(numTotalHits >= end) { boolean quit = false; while(true) { System.out.print("Press "); if(start - hitsPerPage >=0) { System.out.print("<p>revious page, "); } if(start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.print("(q) uit or enter number to jump to a page."); String line = in.readLine(); if(line.length()==0||line.charAt(0)=='q') { quit = true; break; } if(line.charAt(0)=='p') { start = Math.max(0, start - hitsPerPage); break; }else if(line.charAt(0)=='n'){ if(start+hitsPerPage < numTotalHits) start+=hitsPerPage; break; }else{ int page = Integer.parseInt(line); if((page - 1)*hitsPerPage < numTotalHits){ start = (page -1 )*hitsPerPage; break; } else { System.out.println("No such page!"); } } } if(quit)break; end = Math.min(numTotalHits, start+hitsPerPage); } } } }