trie树-AC自动机
trie树--AC自动机
package com.chipmunk.algorithm.trie; public class Branch { private char word; private byte status = 0;//0词语未结束1词语结束 private Branch[] branches = null; public Branch(char word) { super(); this.word = word; } public Branch(char word, byte status) { super(); this.word = word; this.status = status; } /** * * @param b * @return */ public Branch add(Branch b){ if (branches==null) { branches=new Branch[1]; branches[0] = b; return branches[0]; }else { char w1=b.getWord(); //判断是否已经有相等的字 int i = -1; for (int j = 0; j < branches.length; j++) { Branch bb =branches[j]; char w2 = bb.getWord(); if (w1==w2) { i=j; //如果原来这个字不是结束符,更改为结束符 if (b.getStatus()==1&&bb.getStatus()==0) { bb.setStatus(b.getStatus()); } break; } } if (i>-1) {//字已经添加过了 return branches[i]; }else {//添加一个字 Branch[]branches2=new Branch[branches.length+1]; System.arraycopy(branches,0,branches2,0,branches.length); branches2[branches2.length-1]=b; branches=branches2; return branches[branches.length-1]; } } } public char getWord() { return word; } public void setWord(char word) { this.word = word; } public byte getStatus() { return status; } public void setStatus(byte status) { this.status = status; } public Branch[] getBranches() { return branches; } public void setBranches(Branch[] branches) { this.branches = branches; } }
package com.chipmunk.algorithm.trie; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; public class TrieTree { /** * 向trie树添加新词 * @param root * @param text */ private static void addWords(Branch root,String text){ Branch branch_grow = root; char[] chars = text.toCharArray(); for (int i = 0; i < chars.length; i++) { byte status = 0; if (i == chars.length-1) { status = 1; } Branch b = branch_grow.add(new Branch(chars[i], status)); branch_grow=b; } } /** * 搜索一个词---全匹配 * @param root 树根 * @param text 搜索词 * @return */ private static boolean searchTerm(Branch root,String text){ char[]cs = text.toCharArray(); Branch finder = root; for (int i = 0; i < cs.length; i++) { char c = cs[i]; boolean bool_equal = false; boolean bool_last = i==cs.length-1;//最后一个字 Branch[]bs = finder.getBranches(); if (bs!=null) { for (Branch branch : bs) { char w = branch.getWord(); if (c==w) { finder=branch; if (bool_last) { byte status = branch.getStatus(); if (status==0) {//是否是词语最后一个字 bool_equal = false; }else { bool_equal = true; } }else { bool_equal = true; } break; } } } if (!bool_equal) { return false; } } return true; } /** * 搜索一个词---部分匹配 * @param root 树根 * @param text 搜索词 * @return */ public static List<String> searchTermPart(Branch root,String text){ char[]cs = text.toCharArray(); List<String> list = new ArrayList<String>(); StringBuffer text_temp = new StringBuffer(); Branch finder = root; for (int i = 0; i < cs.length; i++) { char c = cs[i]; boolean bool_equal = false;//是否相等 Branch[]bs = finder.getBranches(); if (bs!=null) { for (Branch branch : bs) { char w = branch.getWord(); if (c==w) { finder=branch; byte status = branch.getStatus(); text_temp.append(c); // System.out.println(status+"--"+c); if (status==1) {//词语结尾标示 list.add(text_temp.toString()); } bool_equal = true; break; } } } if (!bool_equal&&text_temp.length()>0) { return list; } } return list; } public static List<String> searchAllTerms(Branch root,String text){ char[]cs = text.toCharArray(); List<String> list = new ArrayList<String>(); StringBuffer text_temp = new StringBuffer(); Branch finder = root; for (int i = 0; i < cs.length; i++) { // System.out.println(i); char c = cs[i]; boolean bool_equal = false;//是否相等 Branch[]bs = finder.getBranches(); if (bs!=null) { for (Branch branch : bs) { char w = branch.getWord(); if (c==w) { finder=branch; byte status = branch.getStatus(); text_temp.append(c); // System.out.println(status+"--"+c+"--"+i); if (status==1) { list.add(text_temp.toString()); } bool_equal = true; break; } } } //到了不等的文字节点 if (!bool_equal&&text_temp.length()>0) { i=i-text_temp.length(); text_temp=new StringBuffer();//临时匹配文字组归零 finder=root;//文字查找器回trie树根部 } } return list; } /** * 种树--建立trie树 * @param words * @return */ public static Branch plantTree(Collection<String> words){ Branch root = new Branch(' '); for (String w : words) { addWords(root, w); } return root; } /** * 打印所有树枝字 * @param tree */ public static void print(Branch tree){ Branch[] bs = tree.getBranches(); if (bs!=null) { for (Branch branch : bs) { System.out.print(branch.getWord()); if (branch.getStatus()==1) { System.out.println(); } print(branch); } } } public static void main(String[] args) { List<String> words = new ArrayList<String>(); words.add("中国人"); words.add("中华民族"); words.add("中华"); words.add("大中华区"); words.add("中华人民共和国"); words.add("中华民国"); words.add("华人"); words.add("国歌"); Branch tree = plantTree(words); System.out.println(searchTerm(tree, "中华人民共和国")); System.out.println(searchAllTerms(tree, "唱中华人民共和国国歌是")); // 01234 56789 10 // print(tree); } }