View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text.translate;
18  
19  import java.io.IOException;
20  import java.io.Writer;
21  import java.util.HashMap;
22  import java.util.HashSet;
23  
24  /**
25   * Translates a value using a lookup table.
26   * But doesn't translate if that value is already translated.
27   *
28   * @since 1.0
29   */
30  public class SingleLookupTranslator extends CharSequenceTranslator {
31  
32      private final HashMap<String, String> lookupMap;
33      private final HashSet<Character>      prefixSet;
34      private final int                     shortest;
35      private final int                     longest;
36      private final int                     shortestValue;
37      private final int                     longestValue;
38  
39      /**
40       * Define the look tables to be used in translation.
41       *
42       * Note that, as of Lang 3.1, the key to the lookup table is converted to a
43       * java.lang.String. This is because we need the key to support hashCode and
44       * equals(Object), allowing it to be the key for a HashMap. See LANG-882.
45       *
46       * Also note that, multiple lookup tables should be passed to this translator
47       * instead of passing multiple instances of this translator to the
48       * AggregateTranslator. Because, this translator only checks the values of the
49       * lookup table passed to this instance while deciding whether a value is
50       * already translated or not.
51       *
52       * @param inputArrays, an array of string arrays.
53       */
54      public SingleLookupTranslator(final String[][]... inputArrays) {
55          String[][] lookup = new String[0][];
56          for (String[][] input : inputArrays) {
57              lookup = append(lookup, input);
58          }
59          lookupMap = new HashMap<String, String>();
60          prefixSet = new HashSet<Character>();
61          int _shortest = Integer.MAX_VALUE;
62          int _longest = 0;
63          int _shortestValue = Integer.MAX_VALUE;
64          int _longestValue = 0;
65          if (lookup != null) {
66              for (final CharSequence[] seq : lookup) {
67                  this.lookupMap.put(seq[0].toString(), seq[1].toString());
68                  this.prefixSet.add(seq[0].charAt(0));
69                  final int sz = seq[0].length();
70                  if (sz < _shortest) {
71                      _shortest = sz;
72                  }
73                  if (sz > _longest) {
74                      _longest = sz;
75                  }
76                  final int sizeOfValue = seq[1].length();
77                  if (sizeOfValue < _shortestValue) {
78                      _shortestValue = sizeOfValue;
79                  }
80                  if (sizeOfValue > _longestValue) {
81                      _longestValue = sizeOfValue;
82                  }
83              }
84          }
85          shortest = _shortest;
86          longest = _longest;
87          shortestValue = _shortestValue;
88          longestValue = _longestValue;
89      }
90  
91      private static String[][] append(String[][] a, String[][] b) {
92          String[][] result = new String[a.length + b.length][];
93          System.arraycopy(a, 0, result, 0, a.length);
94          System.arraycopy(b, 0, result, a.length, b.length);
95          return result;
96      }
97  
98      /**
99       * Translate a set of codepoints, represented by an int index into a CharSequence,
100      * into another set of codepoints. The number of codepoints consumed must be returned,
101      * and the only IOExceptions thrown must be from interacting with the Writer so that
102      * the top level API may reliably ignore StringWriter IOExceptions.
103      *
104      * @param input CharSequence that is being translated
105      * @param index int representing the current point of translation
106      * @param out   Writer to translate the text to
107      * @return int count of codepoints consumed
108      * @throws IOException if and only if the Writer produces an IOException
109      */
110     @Override
111     public int translate(CharSequence input, int index, Writer out) throws IOException {
112         // check if already translated
113         int maxValue = longestValue;
114         if (index + maxValue > input.length()) {
115             maxValue = input.length() - index;
116         }
117         // implement greedy algorithm to check all the possible 'value' matches for which we need to skip translation.
118         for (int i = maxValue; i >= shortestValue; i--) {
119             final CharSequence subSeq = input.subSequence(index, index + i);
120             // If the sub-string is already translated, return without translating.
121             if (lookupMap.containsValue(subSeq.toString())) {
122                 return 0;
123             }
124         }
125 
126         // check if translation exists for the input at position index
127         if (prefixSet.contains(input.charAt(index))) {
128             int max = longest;
129             if (index + longest > input.length()) {
130                 max = input.length() - index;
131             }
132             // implement greedy algorithm by trying maximum match first
133             for (int i = max; i >= shortest; i--) {
134                 final CharSequence subSeq = input.subSequence(index, index + i);
135                 final String result = lookupMap.get(subSeq.toString());
136 
137                 if (result != null) {
138                     out.write(result);
139                     return i;
140                 }
141             }
142         }
143         return 0;
144     }
145 }