lucene案例-blog
本demo功能:
1)新增博客,并添加lucene索引;以及更新、删除博客(同时维护索引);以及通过lucene索引搜索博客;
2)添加lucene索引时使用lucene-analyzers-smartcn中文分词,搜索的结果进行高亮显示。
demo结构
pom.xml
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> <version>2.1.1.RELEASE</version> <relativePath /> <!-- lookup parent from repository --> </parent> <groupId>com.oy</groupId> <artifactId>blog</artifactId> <version>1.0.0</version> <packaging>jar</packaging> <name>blog-demo</name> <description>Demo project for Spring Boot</description> <properties> <java.version>1.8</java.version> </properties> <dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-tomcat</artifactId> <scope>provided</scope> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-thymeleaf</artifactId> </dependency> <dependency> <groupId>org.mybatis.spring.boot</groupId> <artifactId>mybatis-spring-boot-starter</artifactId> <version>1.3.2</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.36</version> </dependency> <dependency> <groupId>commons-lang</groupId> <artifactId>commons-lang</artifactId> <version>2.5</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.75</version> </dependency> <!-- 百度编辑器ueditor --> <dependency> <groupId>commons-fileupload</groupId> <artifactId>commons-fileupload</artifactId> <version>1.3.1</version> </dependency> <!-- lucene --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-smartcn</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>5.3.1</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-maven-plugin</artifactId> </plugin> </plugins> </build> </project>
application.properties
server.port=80 server.servlet.context-path=/ logging.level.root=info logging.file=d:/logs/boot-demo.log #datasource spring.datasource.driver-class-name=com.mysql.jdbc.Driver spring.datasource.url=jdbc:mysql://127.0.0.1:3306/db_blog?useUnicode=true&characterEncoding=utf8&serverTimezone=GMT%2B8 spring.datasource.username=root spring.datasource.password= spring.datasource.tomcat.min-idle=5 ##################### MyBatis相关配置 [start] ##################### #MyBatis映射文件 mybatis.mapper-locations=classpath:com/oy/mapping/*.xml #扫描生成实体的别名,需要和注解@Alias联合使用 mybatis.type-aliases-package=com.oy.entity #MyBatis配置文件,当你的配置比较复杂的时候,可 以使用 #mybatis.config-location= #级联延迟加载。true:开启延迟加载 mybatis.configuration.lazy-loading-enabled=true #积极的懒加载。false:按需加载 mybatis.configuration.aggressive-lazy-loading=false ##################### MyBatis相关配置 [end] ###################### # 博客索引库目录 indexDir=D:/blogLuceneIndexDir # 通过关键字查询博客索引库,对结果分页展示时每页的记录数 blogLuceneIndexShowRows=10
sql.txt
CREATE TABLE `blog` ( `id` int(11) NOT NULL AUTO_INCREMENT, `title` varchar(200) DEFAULT NULL, `summary` varchar(400) DEFAULT NULL, `releaseDate` datetime DEFAULT NULL, `content` text, `keyWord` varchar(200) DEFAULT NULL, `contentNoTag` text, PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
BlogController:新增博客,并添加lucene索引;以及更新、删除博客(同时维护索引);以及通过lucene索引搜索博客
package com.oy.controller; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Controller; import org.springframework.ui.Model; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.servlet.config.annotation.ViewControllerRegistry; import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; import com.oy.entity.Blog; import com.oy.lucene.BlogIndex; import com.oy.service.BlogService; import com.oy.util.StringUtil; /** * @author oy * @version 1.0 * @date 2021年1月24日 * @time 下午5:04:21 */ @Controller @RequestMapping("/blog") public class BlogController implements WebMvcConfigurer { @Value("${indexDir}") private String indexDir; // 通过关键字查询博客索引库,对结果分页展示时每页的记录数 @Value("${blogLuceneIndexShowRows}") private Integer rows; @Autowired private BlogService blogService; @Override public void addViewControllers(ViewControllerRegistry registry) { registry.addViewController("/page/blog/save").setViewName("blog/save"); } @GetMapping("/list") public String list(Model model) { Map<String, Object> queryInfo = new HashMap<>(); model.addAttribute("blogList", blogService.findBlog(queryInfo)); return "blog/list"; } @GetMapping("/preEdit/{id}") public String preEdit(@PathVariable Integer id, Model model) { Blog blog = blogService.findById(id); model.addAttribute("blog", blog); return "blog/save"; } @PostMapping("/save") public void save(Blog blog, Model model) throws Exception { BlogIndex blogIndex = new BlogIndex(indexDir); if (blog.getId() != null) { // 修改 blogService.update(blog); blogIndex.updateIndex(blog); // 更新博客索引 } else { // 添加 blog.setReleaseDate(new Date()); blogService.add(blog); blogIndex.addIndex(blog); // 给博客添加索引 } } @GetMapping("/del/{id}") public String del(@PathVariable Integer id, Model model) throws Exception { blogService.deleteById(id); BlogIndex blogIndex = new BlogIndex(indexDir); blogIndex.deleteIndex("" + id); return "redirect:/blog/list"; } @GetMapping("/{id}") @ResponseBody public Blog findById(@PathVariable Integer id, Model model) { Blog blog = blogService.findById(id); return blog; } // ================================================================== /** * 根据关键字查询相关博客信息 * * @param q * 搜索关键字 * @param page * 当前页 * @return * @throws Exception */ @RequestMapping("/q") public String search(@RequestParam(value = "q", required = true) String q, @RequestParam(value = "page", required = false) String page, Model model) throws Exception { if (StringUtil.isEmpty(q)) { return "redirect:/blog/list"; } // 前台不传当前页参数,则默认显示第1页 if (StringUtil.isEmpty(page)) { page = "1"; } // 根据关键字查询相关博客信息 BlogIndex blogIndex = new BlogIndex(indexDir); List<Blog> blogList = blogIndex.searchBlog(q); System.out.println("据关键字查询相关博客信息, blogList:" + blogList); // 从blogList集合中取对应数据进行分页展示 int start = (Integer.parseInt(page) - 1) * rows; int end = start + 10; if (end > blogList.size()) { end = blogList.size(); } // subList方法返回索引[start,end)的list子集,包左不包右 List<Blog> BlogSubList = blogList.subList(start, end); model.addAttribute("blogList", BlogSubList); model.addAttribute("q", q); model.addAttribute("resultTotal", blogList.size()); return "blog/query"; } }
BlogIndex:操作lucene索引
package com.oy.lucene; import java.io.StringReader; import java.nio.file.Paths; import java.util.LinkedList; import java.util.List; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import com.oy.entity.Blog; import com.oy.util.DateUtil; import com.oy.util.StringUtil; /** * 给博客添加索引 * * @author oy * @version 1.0 * @date 2018年12月5日 * @time 下午4:22:55 */ public class BlogIndex { private String indexDir; // 索引库目录 /** * 构造方法 * * @param indexDir * 索引库目录 * @throws Exception */ public BlogIndex(String indexDir) throws Exception { this.indexDir = indexDir; } /** * 获取IndexWriter实例 * * @return * @throws Exception */ public IndexWriter getIndexWriter() throws Exception { // 索引库目录 Directory dir = FSDirectory.open(Paths.get(indexDir)); // 使用中文分词器SmartChineseAnalyzer SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, config); return writer; } /** * 添加索引 * * @param dataDir * 数据源目录 * @throws Exception */ public void addIndex(Blog blog) throws Exception { IndexWriter writer = getIndexWriter(); Document doc = new Document(); doc.add(new StringField("id", String.valueOf(blog.getId()), Field.Store.YES)); doc.add(new TextField("title", blog.getTitle(), Field.Store.YES)); doc.add(new StringField("releaseDate", DateUtil.formatDate(blog.getReleaseDate(), "yyyy-MM-dd HH:mm:ss"), Field.Store.YES)); // content实际存储的是contentNoTag,即去除html标签后的内容 doc.add(new TextField("content", blog.getContentNoTag(), Field.Store.YES)); writer.addDocument(doc); writer.close(); } /** * 删除指定博客的索引 * * @param blogId * @throws Exception */ public void deleteIndex(String blogId) throws Exception { IndexWriter writer = getIndexWriter(); writer.deleteDocuments(new Term("id", blogId)); writer.forceMergeDeletes(); // 强制删除 writer.commit(); writer.close(); } /** * 更新博客索引 * * @param blog * @throws Exception */ public void updateIndex(Blog blog) throws Exception { IndexWriter writer = getIndexWriter(); Document doc = new Document(); doc.add(new StringField("id", String.valueOf(blog.getId()), Field.Store.YES)); doc.add(new TextField("title", blog.getTitle(), Field.Store.YES)); doc.add(new StringField("releaseDate", DateUtil.formatDate(blog.getReleaseDate(), "yyyy-MM-dd HH:mm:ss"), Field.Store.YES)); doc.add(new TextField("content", blog.getContentNoTag(), Field.Store.YES)); writer.updateDocument(new Term("id", String.valueOf(blog.getId())), doc); writer.close(); } /** * 通过关键字搜索博客 * * @param queryStr * 搜索关键字 * @return * @throws Exception */ public List<Blog> searchBlog(String queryStr) throws Exception { // 创建IndexSearch对象 Directory dir = FSDirectory.open(Paths.get(indexDir)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher indexSearcher = new IndexSearcher(reader); // 组合查询BooleanQuery BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); // 中文分词器smartcn SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); // 第一个查询条件:查询title QueryParser parser = new QueryParser("title", analyzer); Query query = parser.parse(queryStr); // 第二个查询条件:查询content QueryParser parser2 = new QueryParser("content", analyzer); Query query2 = parser2.parse(queryStr); booleanQuery.add(query, BooleanClause.Occur.SHOULD); booleanQuery.add(query2, BooleanClause.Occur.SHOULD); // 执行搜索 TopDocs hits = indexSearcher.search(booleanQuery.build(), 100); // 对搜索结果进行高亮设置 QueryScorer scorer = new QueryScorer(query); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='blue'>", "</font></b>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer); highlighter.setTextFragmenter(fragmenter); List<Blog> blogList = new LinkedList<Blog>(); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = indexSearcher.doc(scoreDoc.doc); Blog blog = new Blog(); blog.setId(Integer.parseInt(doc.get("id"))); blog.setReleaseDate(DateUtil.formatString(doc.get("releaseDate"), "yyyy-MM-dd HH:mm:ss")); // 先获取title文本 String title = doc.get("title"); // 先获取content文本,并对文本中特殊字符进行转义 // String content = StringEscapeUtils.escapeHtml(doc.get("content")); String content = doc.get("content"); System.out.println("索引库存储的content:" + content); // 然后,对title文本中"命中率最高的部分"进行高亮显示 if (title != null) { TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(title)); // 获取title文本中"命中率最高的部分" String hTitle = highlighter.getBestFragment(tokenStream, title); if (StringUtil.isEmpty(hTitle)) { // 如果没有命中,将整个title文本设置给blog对象 blog.setTitle(title); } else { blog.setTitle(hTitle); } } // 然后,对content文本中"命中率最高的部分"进行高亮显示 if (content != null) { TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content)); // 获取content文本中"命中率最高的部分" String hContent = highlighter.getBestFragment(tokenStream, content); if (StringUtil.isEmpty(hContent)) { // 如果没有命中,将content文本前200个字符设置给blog对象 if (content.length() <= 2000) { blog.setContent(content); } else { blog.setContent(content.substring(0, 2000)); } } else { blog.setContent(hContent); } } blogList.add(blog); } return blogList; } }
列表
添加博客
搜索
---