1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.text.translate;
18
19 import java.io.IOException;
20 import java.io.Writer;
21 import java.util.HashMap;
22 import java.util.HashSet;
23
24 /**
25 * Translates a value using a lookup table.
26 * But doesn't translate if that value is already translated.
27 *
28 * @since 1.0
29 */
30 public class SingleLookupTranslator extends CharSequenceTranslator {
31
32 private final HashMap<String, String> lookupMap;
33 private final HashSet<Character> prefixSet;
34 private final int shortest;
35 private final int longest;
36 private final int shortestValue;
37 private final int longestValue;
38
39 /**
40 * Define the look tables to be used in translation.
41 *
42 * Note that, as of Lang 3.1, the key to the lookup table is converted to a
43 * java.lang.String. This is because we need the key to support hashCode and
44 * equals(Object), allowing it to be the key for a HashMap. See LANG-882.
45 *
46 * Also note that, multiple lookup tables should be passed to this translator
47 * instead of passing multiple instances of this translator to the
48 * AggregateTranslator. Because, this translator only checks the values of the
49 * lookup table passed to this instance while deciding whether a value is
50 * already translated or not.
51 *
52 * @param inputArrays, an array of string arrays.
53 */
54 public SingleLookupTranslator(final String[][]... inputArrays) {
55 String[][] lookup = new String[0][];
56 for (String[][] input : inputArrays) {
57 lookup = append(lookup, input);
58 }
59 lookupMap = new HashMap<String, String>();
60 prefixSet = new HashSet<Character>();
61 int _shortest = Integer.MAX_VALUE;
62 int _longest = 0;
63 int _shortestValue = Integer.MAX_VALUE;
64 int _longestValue = 0;
65 if (lookup != null) {
66 for (final CharSequence[] seq : lookup) {
67 this.lookupMap.put(seq[0].toString(), seq[1].toString());
68 this.prefixSet.add(seq[0].charAt(0));
69 final int sz = seq[0].length();
70 if (sz < _shortest) {
71 _shortest = sz;
72 }
73 if (sz > _longest) {
74 _longest = sz;
75 }
76 final int sizeOfValue = seq[1].length();
77 if (sizeOfValue < _shortestValue) {
78 _shortestValue = sizeOfValue;
79 }
80 if (sizeOfValue > _longestValue) {
81 _longestValue = sizeOfValue;
82 }
83 }
84 }
85 shortest = _shortest;
86 longest = _longest;
87 shortestValue = _shortestValue;
88 longestValue = _longestValue;
89 }
90
91 private static String[][] append(String[][] a, String[][] b) {
92 String[][] result = new String[a.length + b.length][];
93 System.arraycopy(a, 0, result, 0, a.length);
94 System.arraycopy(b, 0, result, a.length, b.length);
95 return result;
96 }
97
98 /**
99 * Translate a set of codepoints, represented by an int index into a CharSequence,
100 * into another set of codepoints. The number of codepoints consumed must be returned,
101 * and the only IOExceptions thrown must be from interacting with the Writer so that
102 * the top level API may reliably ignore StringWriter IOExceptions.
103 *
104 * @param input CharSequence that is being translated
105 * @param index int representing the current point of translation
106 * @param out Writer to translate the text to
107 * @return int count of codepoints consumed
108 * @throws IOException if and only if the Writer produces an IOException
109 */
110 @Override
111 public int translate(CharSequence input, int index, Writer out) throws IOException {
112 // check if already translated
113 int maxValue = longestValue;
114 if (index + maxValue > input.length()) {
115 maxValue = input.length() - index;
116 }
117 // implement greedy algorithm to check all the possible 'value' matches for which we need to skip translation.
118 for (int i = maxValue; i >= shortestValue; i--) {
119 final CharSequence subSeq = input.subSequence(index, index + i);
120 // If the sub-string is already translated, return without translating.
121 if (lookupMap.containsValue(subSeq.toString())) {
122 return 0;
123 }
124 }
125
126 // check if translation exists for the input at position index
127 if (prefixSet.contains(input.charAt(index))) {
128 int max = longest;
129 if (index + longest > input.length()) {
130 max = input.length() - index;
131 }
132 // implement greedy algorithm by trying maximum match first
133 for (int i = max; i >= shortest; i--) {
134 final CharSequence subSeq = input.subSequence(index, index + i);
135 final String result = lookupMap.get(subSeq.toString());
136
137 if (result != null) {
138 out.write(result);
139 return i;
140 }
141 }
142 }
143 return 0;
144 }
145 }