Lucene 入门示范

Lucene 入门示例

通过敲写着连个例子,大概了解了lucene 的核心类以及主要api 的功能。
package Demo;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class IndexFiles {

	private IndexFiles(){}
	/**Index all text files under a directory **/
	
	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub

		 String usage = "java org.apache.lucene.demo.IndexFiles"
				                  + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
				                  + "This indexes the documents in DOCS_PATH, creating a Lucene index"
				                  + "in INDEX_PATH that can be searched with SearchFiles";
		 String indexPath = "index";
		 
		 String docsPath = args[0];
		 
		 boolean create = true;
		 
		 if(docsPath==null)
		 {
			 System.err.println("input the docsPath");
			 System.exit(1);
		 }
		 
		 final File docDir = new File(docsPath);
		 
		 if(!docDir.exists()||!docDir.canRead())
		 {
			 System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
		     System.exit(1);
		 }
		 
		 Date start = new Date();
		 try{
			// System.out.println("Indexing to directory '")
			 Directory dir =FSDirectory.open(new File(indexPath));
			 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
			 
			 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
			 
			 if(create)
			 {
				 iwc.setOpenMode(OpenMode.CREATE);
			 }
			 else {
				 iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
			 }
			 IndexWriter writer = new IndexWriter(dir, iwc);
			 indexDocs(writer, docDir);
			 writer.close();
			 Date end = new Date();
			 System.out.println(end.getTime()-start.getTime() + "total milliseconds");
			 
		 }catch(IOException e)
		 {
			 System.out.println("caught a "+ e.getClass()+
					 "\n with message:" + e.getMessage());
		 }
				 
	}

	/**
	 * do not try to index files that cannot be read
	 * @throws IOException 
	 */
	static void indexDocs(IndexWriter writer,File file) throws IOException
	{
		if(file.canRead())
		{
		    if(file.isDirectory())
		    {
		    	String[] files = file.list();
		    	if(files != null){
		    		for(int i = 0;i < files.length;i++)
		    		{
		    			indexDocs(writer,new File(file,files[i]));
		    		}
		    	}
		    }
		  else{
		    	FileInputStream fis;
		    	fis = new FileInputStream(file);
		    	
		    	try{
		    		//make a new ,empty document
		    		Document doc = new Document();
		    	    Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
		    	    doc.add(pathField);
		    	    
		    	    doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));
		    	    doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis,"UTF-8"))));
		    	    
		    	    if(writer.getConfig().getOpenMode()==OpenMode.CREATE)
		    	    {
		    	    	//new index so we just add the document (no old document can be there)
		    	    	System.out.println("adding " + file);
		    	    	writer.addDocument(doc);
		    	    }
		    	    else 
		    	    {
		    	    	//Existing index (an old copy of the document may have been indexed)
		    	    	// so we use updataDocument instead to replace the old one matching 
		    	    	//the exact path,if present
		    	    	System.out.println("updating " + file);
		    	    	writer.updateDocument(new Term("path", file.getPath()), doc);
		    	    	
		    	    }
		    		
		    	}finally{
	    	    	fis.close();
	    	    }
		    }
		}
       
	}
}


package Demo;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.Buffer;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.xml.sax.InputSource;

public class Searchfiles {

	/**
	 * @param args
	 * @throws IOException 
	 * @throws ParseException 
	 */
	public static void main(String[] args) throws IOException, ParseException {
		// TODO Auto-generated method stub

		String index = "index";
		
		String field = "contents";
		String queries = null;
		
		int repeat = 0;
		boolean raw = false;
		
		String queryString = null;
		
		int hitsPerPage = 10;
		// 打开索引所在的文件夹
		IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
		
		//根据indexReader 打开的索引文件 建立检索
		IndexSearcher searcher = new IndexSearcher(reader);
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
		BufferedReader in = null;
		if(queries != null)
		{
			in = new BufferedReader(new InputStreamReader(new FileInputStream(queries),"UTF-8"));
		}
		else {
			in = new BufferedReader(new InputStreamReader(System.in,"UTF-8"));
		}
		QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer);
		
		while(true)
		{
			if(queries == null && queryString == null)
			{
				System.out.println("Enter query: ");
			}
			
			String line = queryString !=null ? queryString : in.readLine();
			
			if(line==null || line.length()== -1)break;
			
			line = line.trim();
			
			if(line.length()==0)break;
			
			Query query = parser.parse(line);
			
			System.out.println("Searching for : " + query.toString(field));
			
			if(repeat > 0) //repeat & time as benchmark
			{
				Date start = new Date();
				for(int i =0 ;i < repeat;i++)
					searcher.search(query, null,100);
				Date end = new Date();
				System.out.println("Time: + " +(end.getTime() - start.getTime()) + "ms.");
			}
			
			doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString==null);
			
			if(queryString == null)break;
		}
	}

	public static void doPagingSearch(BufferedReader in , IndexSearcher searcher,
			     Query query,int hitsPerPage,boolean raw ,boolean interactive ) throws IOException
	{
		// Collect enough docs to show 5 pages
		TopDocs results = searcher.search(query, 5*hitsPerPage);
		
		ScoreDoc[] hits =results.scoreDocs;
		
		int numTotalHits = results.totalHits;
		
		System.out.println(numTotalHits + " total matching documents");
		
		int start = 0;
		
		int end = Math.min(numTotalHits, hitsPerPage);
		
		while(true)
		{
			if(end > hits.length){
				System.out.println("Only results 1 - "+ hits.length
						+ "  of" + numTotalHits + "total matching documents collected .");
				System.out.println("Collect more (y/n)?");
				String line = in.readLine();
				
				if(line.length()==0||line.charAt(0)=='n')
				{
					break;
				}
				hits =searcher.search(query, numTotalHits).scoreDocs;
			}
			end = Math.min(hits.length, start+hitsPerPage);
			for(int i = start; i < end ;i++)
			{
				if(raw)  // output raw format
				{
					System.out.println("doc="+hits[i].doc + " score= " + hits[i].score);
					continue;
				}
				Document doc = searcher.doc(hits[i].doc);
				String path = doc.get("path");
				
				if(path!=null)
				{
					System.out.println((i+1)+"."+path);
					String title = doc.get("title");
					if(title!=null)
					{
						System.out.println("  Title:" + doc.get("title"));
					}
					
				}else{
					System.out.println((i+1) + "." + "No path for this document");
					
				}
			}
			
			if(!interactive || end==0)
			{
				break;
			}
			
			if(numTotalHits >= end)
			{
				boolean quit = false;
				while(true)
				{
					System.out.print("Press ");
					if(start - hitsPerPage >=0)
					{
						System.out.print("<p>revious page, ");
					}
					if(start + hitsPerPage < numTotalHits)
					{
						System.out.print("(n)ext page, ");
					}
					
					System.out.print("(q) uit or enter number to jump to a page.");
					String line = in.readLine();
					
					if(line.length()==0||line.charAt(0)=='q')
					{
						quit = true;
						break;
					}
					
					if(line.charAt(0)=='p')
					{
						start = Math.max(0, start - hitsPerPage);
						break;
					}else if(line.charAt(0)=='n'){
						if(start+hitsPerPage < numTotalHits)
						start+=hitsPerPage;
						break;
					}else{
						int  page = Integer.parseInt(line);
						if((page - 1)*hitsPerPage  < numTotalHits){
							start = (page -1 )*hitsPerPage;
							break;
						}
						else {
							System.out.println("No such page!");
						}
					}
					
				}
				if(quit)break;
				end = Math.min(numTotalHits, start+hitsPerPage);
			}
		}
	}
}