作者 crossoverJie

:sparkles: Introducing new features.字典树

1 package com.crossoverjie.cim.common.data.construct; 1 package com.crossoverjie.cim.common.data.construct;
2 2
  3 +import com.crossoverjie.cim.common.util.StringUtil;
  4 +
  5 +import java.util.ArrayList;
  6 +import java.util.List;
  7 +
3 /** 8 /**
4 - * Function: 9 + * Function:字典树字符前缀模糊匹配
5 * 10 *
6 * @author crossoverJie 11 * @author crossoverJie
7 * Date: 2019/1/7 18:58 12 * Date: 2019/1/7 18:58
@@ -9,33 +14,198 @@ package com.crossoverjie.cim.common.data.construct; @@ -9,33 +14,198 @@ package com.crossoverjie.cim.common.data.construct;
9 */ 14 */
10 public class TrieTree { 15 public class TrieTree {
11 16
12 - private Node root ; 17 + private static final int CHILDREN_LENGTH = 26 * 2;
  18 + private static final int MAX_CHAR_LENGTH = 16;
  19 +
  20 + private static final char UPPERCASE_STAR = 'A';
  21 +
  22 + /**
  23 + * 小写就要 -71
  24 + */
  25 + private static final char LOWERCASE_STAR = 'G';
  26 +
  27 + private Node root;
13 28
14 public TrieTree() { 29 public TrieTree() {
15 - root = new Node(null); 30 + root = new Node();
16 } 31 }
17 32
  33 + /**
  34 + * 写入
  35 + *
  36 + * @param data
  37 + */
  38 + public void insert(String data) {
  39 + this.insert(this.root, data);
  40 + }
18 41
19 - public void insert(String data){ 42 + private void insert(Node root, String data) {
20 char[] chars = data.toCharArray(); 43 char[] chars = data.toCharArray();
21 - for (char aChar : chars) { 44 + for (int i = 0; i < chars.length; i++) {
  45 + char aChar = chars[i];
  46 + int index ;
  47 + if (Character.isUpperCase(aChar)) {
  48 + index = aChar - UPPERCASE_STAR;
  49 + } else {
  50 + //小写就要 -71
  51 + index = aChar - LOWERCASE_STAR;
  52 + }
  53 +
  54 +
  55 + if (index >= 0 && index < CHILDREN_LENGTH) {
  56 + if (root.children[index] == null) {
  57 + Node node = new Node();
  58 + root.children[index] = node;
  59 + root.children[index].data = chars[i];
  60 +
  61 + }
  62 +
  63 + //最后一个字符设置标志
  64 + if (i + 1 == chars.length) {
  65 + root.children[index].isEnd = true;
  66 + }
  67 +
  68 + //指向下一节点
  69 + root = root.children[index];
  70 + }
  71 +
  72 + }
  73 + }
  74 +
  75 +
  76 + /**
  77 + * 递归深度遍历
  78 + *
  79 + * @param key
  80 + * @return
  81 + */
  82 + public List<String> prefixSearch(String key) {
  83 + List<String> value = new ArrayList<String>();
  84 + if (StringUtil.isEmpty(key)) {
  85 + return value;
  86 + }
  87 +
  88 + char k = key.charAt(0);
  89 + int index;
  90 + if (Character.isUpperCase(k)) {
  91 + index = k - UPPERCASE_STAR;
  92 + } else {
  93 + index = k - LOWERCASE_STAR;
  94 +
  95 + }
  96 + if (root.children != null && root.children[index] != null) {
  97 + return query(root.children[index], value,
  98 + key.substring(1), String.valueOf(k));
  99 + }
  100 + return value;
  101 + }
  102 +
  103 + private List<String> query(Node child, List<String> value, String key, String result) {
  104 +
  105 + if (child.isEnd && key == null) {
  106 + value.add(result);
  107 + }
  108 + if (key != null) {
  109 + char ca = key.charAt(0);
  110 +
  111 + int index;
  112 + if (Character.isUpperCase(ca)) {
  113 + index = ca - UPPERCASE_STAR;
  114 + } else {
  115 + index = ca - LOWERCASE_STAR;
22 116
23 } 117 }
  118 +
  119 + if (child.children[index] != null) {
  120 + query(child.children[index], value, key.substring(1).equals("") ? null : key.substring(1), result + ca);
  121 + }
  122 + } else {
  123 + for (int i = 0; i < CHILDREN_LENGTH; i++) {
  124 + if (child.children[i] != null) {
  125 + int j;
  126 + if (Character.isUpperCase(child.children[i].data)) {
  127 + j = UPPERCASE_STAR + i;
  128 + } else {
  129 + j = LOWERCASE_STAR + i;
  130 + }
  131 +
  132 + char temp = (char) j;
  133 + query(child.children[i], value, null, result + temp);
  134 + }
  135 + }
  136 + }
  137 +
  138 + return value;
  139 + }
  140 +
  141 +
  142 + /**
  143 + * 查询所有
  144 + *
  145 + * @return
  146 + */
  147 + public List<String> all() {
  148 + char[] chars = new char[MAX_CHAR_LENGTH];
  149 + List<String> value = depth(this.root, new ArrayList<String>(), chars, 0);
  150 + return value;
24 } 151 }
25 152
26 - private class Node{  
27 - private Node left ;  
28 - private Node right ;  
29 - private Character data ;  
30 153
31 - public Node(Character data) {  
32 - this.data = data; 154 + public List<String> depth(Node node, List<String> list, char[] chars, int index) {
  155 + if (node.children == null || node.children.length == 0) {
  156 + return list;
33 } 157 }
34 158
35 - public Node(Node left, Node right, Character data) {  
36 - this.left = left;  
37 - this.right = right;  
38 - this.data = data; 159 + Node[] children = node.children;
  160 +
  161 + for (int i = 0; i < children.length; i++) {
  162 + Node child = children[i];
  163 +
  164 + if (child == null) {
  165 + continue;
  166 + }
  167 +
  168 + if (child.isEnd) {
  169 + chars[index] = child.data;
  170 +
  171 + char[] temp = new char[index + 1];
  172 + for (int j = 0; j < chars.length; j++) {
  173 + if (chars[j] == 0) {
  174 + continue;
39 } 175 }
  176 +
  177 + temp[j] = chars[j];
  178 + }
  179 + list.add(String.valueOf(temp));
  180 + return list;
  181 + } else {
  182 + chars[index] = child.data;
  183 +
  184 + index++;
  185 +
  186 + depth(child, list, chars, index);
  187 +
  188 + index = 0;
  189 + }
  190 + }
  191 +
  192 +
  193 + return list;
  194 + }
  195 +
  196 +
  197 + private class Node {
  198 + /**
  199 + * 是否为最后一个字符
  200 + */
  201 + public boolean isEnd = false;
  202 +
  203 + /**
  204 + * 如果支持查询,则不需要存储数据
  205 + */
  206 + public char data;
  207 +
  208 + public Node[] children = new Node[CHILDREN_LENGTH];
  209 +
40 } 210 }
41 } 211 }
  1 +package com.crossoverjie.cim.common.data.construct;
  2 +
  3 +import org.junit.Assert;
  4 +import org.junit.Test;
  5 +
  6 +import java.util.List;
  7 +
  8 +public class TrieTreeTest {
  9 + @Test
  10 + public void insert() throws Exception {
  11 + TrieTree trieTree = new TrieTree();
  12 + trieTree.insert("abc");
  13 + trieTree.insert("abcd");
  14 + }
  15 +
  16 +
  17 + @Test
  18 + public void all() throws Exception {
  19 + TrieTree trieTree = new TrieTree();
  20 + trieTree.insert("ABC");
  21 + trieTree.insert("abC");
  22 + List<String> all = trieTree.all();
  23 + for (String s : all) {
  24 + System.out.println(s);
  25 + }
  26 +
  27 + }
  28 +
  29 + @Test
  30 + public void prefixSearch() throws Exception {
  31 + TrieTree trieTree = new TrieTree();
  32 + trieTree.insert("abc");
  33 + trieTree.insert("abd");
  34 + trieTree.insert("ABe");
  35 +
  36 + List<String> ab = trieTree.prefixSearch("AB");
  37 + for (String s : ab) {
  38 + System.out.println(s);
  39 + }
  40 +
  41 + System.out.println("========");
  42 +
  43 + //char[] chars = new char[3] ;
  44 + //for (int i = 0; i < 3; i++) {
  45 + // int a = 97 + i ;
  46 + // chars[i] = (char) a ;
  47 + //}
  48 + //
  49 + //String s = String.valueOf(chars);
  50 + //System.out.println(s);
  51 + }
  52 +
  53 + @Test
  54 + public void prefixSearch2() throws Exception {
  55 + TrieTree trieTree = new TrieTree();
  56 + trieTree.insert("Cde");
  57 + trieTree.insert("CDa");
  58 + trieTree.insert("ABe");
  59 +
  60 + List<String> ab = trieTree.prefixSearch("AC");
  61 + for (String s : ab) {
  62 + System.out.println(s);
  63 + }
  64 + Assert.assertTrue(ab.size() == 0);
  65 + }
  66 +
  67 + @Test
  68 + public void prefixSearch3() throws Exception {
  69 + TrieTree trieTree = new TrieTree();
  70 + trieTree.insert("Cde");
  71 + trieTree.insert("CDa");
  72 + trieTree.insert("ABe");
  73 +
  74 + List<String> ab = trieTree.prefixSearch("CD");
  75 + for (String s : ab) {
  76 + System.out.println(s);
  77 + }
  78 + Assert.assertTrue(ab.size() == 1);
  79 + }
  80 +
  81 + @Test
  82 + public void prefixSearch4() throws Exception {
  83 + TrieTree trieTree = new TrieTree();
  84 + trieTree.insert("Cde");
  85 + trieTree.insert("CDa");
  86 + trieTree.insert("ABe");
  87 +
  88 + List<String> ab = trieTree.prefixSearch("Cd");
  89 + String result = "";
  90 + for (String s : ab) {
  91 + result += s + "," ;
  92 + System.out.println(s);
  93 + }
  94 + Assert.assertTrue(result.equals("Cde,"));
  95 + }
  96 +
  97 + @Test
  98 + public void prefixSearch5() throws Exception {
  99 + TrieTree trieTree = new TrieTree();
  100 + trieTree.insert("Cde");
  101 + trieTree.insert("CDa");
  102 + trieTree.insert("ABe");
  103 + trieTree.insert("CDfff");
  104 + trieTree.insert("Cdfff");
  105 +
  106 + List<String> ab = trieTree.prefixSearch("Cd");
  107 + String result = "";
  108 + for (String s : ab) {
  109 + result += s + "," ;
  110 + System.out.println(s);
  111 + }
  112 + Assert.assertTrue(result.equals("Cde,Cdfff,"));
  113 + }
  114 +
  115 + @Test
  116 + public void prefixSearch6() throws Exception {
  117 + TrieTree trieTree = new TrieTree();
  118 + trieTree.insert("Cde");
  119 + trieTree.insert("CDa");
  120 + trieTree.insert("ABe");
  121 + trieTree.insert("CDfff");
  122 + trieTree.insert("Cdfff");
  123 +
  124 + List<String> ab = trieTree.prefixSearch("CD");
  125 + String result = "";
  126 + for (String s : ab) {
  127 + result += s + "," ;
  128 + System.out.println(s);
  129 + }
  130 + Assert.assertTrue(result.equals("CDa,CDfff,"));
  131 + }
  132 +}