001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text.translate;
018
019import java.io.IOException;
020import java.io.Writer;
021import java.util.HashMap;
022import java.util.HashSet;
023
024/**
025 * Translates a value using a lookup table.
026 * But doesn't translate if that value is already translated.
027 *
028 * @since 1.0
029 */
030public class SingleLookupTranslator extends CharSequenceTranslator {
031
032    private final HashMap<String, String> lookupMap;
033    private final HashSet<Character>      prefixSet;
034    private final int                     shortest;
035    private final int                     longest;
036    private final int                     shortestValue;
037    private final int                     longestValue;
038
039    /**
040     * Define the look tables to be used in translation.
041     *
042     * Note that, as of Lang 3.1, the key to the lookup table is converted to a
043     * java.lang.String. This is because we need the key to support hashCode and
044     * equals(Object), allowing it to be the key for a HashMap. See LANG-882.
045     *
046     * Also note that, multiple lookup tables should be passed to this translator
047     * instead of passing multiple instances of this translator to the
048     * AggregateTranslator. Because, this translator only checks the values of the
049     * lookup table passed to this instance while deciding whether a value is
050     * already translated or not.
051     *
052     * @param inputArrays, an array of string arrays.
053     */
054    public SingleLookupTranslator(final String[][]... inputArrays) {
055        String[][] lookup = new String[0][];
056        for (String[][] input : inputArrays) {
057            lookup = append(lookup, input);
058        }
059        lookupMap = new HashMap<String, String>();
060        prefixSet = new HashSet<Character>();
061        int _shortest = Integer.MAX_VALUE;
062        int _longest = 0;
063        int _shortestValue = Integer.MAX_VALUE;
064        int _longestValue = 0;
065        if (lookup != null) {
066            for (final CharSequence[] seq : lookup) {
067                this.lookupMap.put(seq[0].toString(), seq[1].toString());
068                this.prefixSet.add(seq[0].charAt(0));
069                final int sz = seq[0].length();
070                if (sz < _shortest) {
071                    _shortest = sz;
072                }
073                if (sz > _longest) {
074                    _longest = sz;
075                }
076                final int sizeOfValue = seq[1].length();
077                if (sizeOfValue < _shortestValue) {
078                    _shortestValue = sizeOfValue;
079                }
080                if (sizeOfValue > _longestValue) {
081                    _longestValue = sizeOfValue;
082                }
083            }
084        }
085        shortest = _shortest;
086        longest = _longest;
087        shortestValue = _shortestValue;
088        longestValue = _longestValue;
089    }
090
091    private static String[][] append(String[][] a, String[][] b) {
092        String[][] result = new String[a.length + b.length][];
093        System.arraycopy(a, 0, result, 0, a.length);
094        System.arraycopy(b, 0, result, a.length, b.length);
095        return result;
096    }
097
098    /**
099     * Translate a set of codepoints, represented by an int index into a CharSequence,
100     * into another set of codepoints. The number of codepoints consumed must be returned,
101     * and the only IOExceptions thrown must be from interacting with the Writer so that
102     * the top level API may reliably ignore StringWriter IOExceptions.
103     *
104     * @param input CharSequence that is being translated
105     * @param index int representing the current point of translation
106     * @param out   Writer to translate the text to
107     * @return int count of codepoints consumed
108     * @throws IOException if and only if the Writer produces an IOException
109     */
110    @Override
111    public int translate(CharSequence input, int index, Writer out) throws IOException {
112        // check if already translated
113        int maxValue = longestValue;
114        if (index + maxValue > input.length()) {
115            maxValue = input.length() - index;
116        }
117        // implement greedy algorithm to check all the possible 'value' matches for which we need to skip translation.
118        for (int i = maxValue; i >= shortestValue; i--) {
119            final CharSequence subSeq = input.subSequence(index, index + i);
120            // If the sub-string is already translated, return without translating.
121            if (lookupMap.containsValue(subSeq.toString())) {
122                return 0;
123            }
124        }
125
126        // check if translation exists for the input at position index
127        if (prefixSet.contains(input.charAt(index))) {
128            int max = longest;
129            if (index + longest > input.length()) {
130                max = input.length() - index;
131            }
132            // implement greedy algorithm by trying maximum match first
133            for (int i = max; i >= shortest; i--) {
134                final CharSequence subSeq = input.subSequence(index, index + i);
135                final String result = lookupMap.get(subSeq.toString());
136
137                if (result != null) {
138                    out.write(result);
139                    return i;
140                }
141            }
142        }
143        return 0;
144    }
145}