LookupTranslator.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.text.translate;

  18. import java.io.IOException;
  19. import java.io.Writer;
  20. import java.security.InvalidParameterException;
  21. import java.util.BitSet;
  22. import java.util.HashMap;
  23. import java.util.Map;

  24. /**
  25.  * Translates a value using a lookup table.
  26.  *
  27.  * @since 1.0
  28.  */
  29. public class LookupTranslator extends CharSequenceTranslator {

  30.     /** The mapping to be used in translation. */
  31.     private final Map<String, String> lookupMap;

  32.     /** The first character of each key in the lookupMap. */
  33.     private final BitSet prefixSet;

  34.     /** The length of the shortest key in the lookupMap. */
  35.     private final int shortest;

  36.     /** The length of the longest key in the lookupMap. */
  37.     private final int longest;

  38.     /**
  39.      * Constructs the lookup table to be used in translation
  40.      *
  41.      * Note that, as of Lang 3.1 (the origin of this code), the key to the lookup
  42.      * table is converted to a java.lang.String. This is because we need the key
  43.      * to support hashCode and equals(Object), allowing it to be the key for a
  44.      * HashMap. See LANG-882.
  45.      *
  46.      * @param lookupMap Map&lt;CharSequence, CharSequence&gt; table of translator
  47.      *                  mappings
  48.      */
  49.     public LookupTranslator(final Map<CharSequence, CharSequence> lookupMap) {
  50.         if (lookupMap == null) {
  51.             throw new InvalidParameterException("lookupMap cannot be null");
  52.         }
  53.         this.lookupMap = new HashMap<>();
  54.         this.prefixSet = new BitSet();
  55.         int currentShortest = Integer.MAX_VALUE;
  56.         int currentLongest = 0;

  57.         for (final Map.Entry<CharSequence, CharSequence> pair : lookupMap.entrySet()) {
  58.             this.lookupMap.put(pair.getKey().toString(), pair.getValue().toString());
  59.             this.prefixSet.set(pair.getKey().charAt(0));
  60.             final int sz = pair.getKey().length();
  61.             if (sz < currentShortest) {
  62.                 currentShortest = sz;
  63.             }
  64.             if (sz > currentLongest) {
  65.                 currentLongest = sz;
  66.             }
  67.         }
  68.         this.shortest = currentShortest;
  69.         this.longest = currentLongest;
  70.     }

  71.     /**
  72.      * {@inheritDoc}
  73.      */
  74.     @Override
  75.     public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
  76.         // check if translation exists for the input at position index
  77.         if (prefixSet.get(input.charAt(index))) {
  78.             int max = longest;
  79.             if (index + longest > input.length()) {
  80.                 max = input.length() - index;
  81.             }
  82.             // implement greedy algorithm by trying maximum match first
  83.             for (int i = max; i >= shortest; i--) {
  84.                 final CharSequence subSeq = input.subSequence(index, index + i);
  85.                 final String result = lookupMap.get(subSeq.toString());

  86.                 if (result != null) {
  87.                     writer.write(result);
  88.                     return Character.codePointCount(subSeq, 0, subSeq.length());
  89.                 }
  90.             }
  91.         }
  92.         return 0;
  93.     }
  94. }