001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text.translate;
018
019import java.io.IOException;
020import java.io.Writer;
021import java.util.HashMap;
022import java.util.HashSet;
023
024/**
025 * Translates a value using a lookup table.
026 *
027 * @since 1.0
028 */
029public class LookupTranslator extends CharSequenceTranslator {
030
031    private final HashMap<String, String> lookupMap;
032    private final HashSet<Character> prefixSet;
033    private final int shortest;
034    private final int longest;
035
036    /**
037     * Define the lookup table to be used in translation
038     *
039     * Note that, as of Lang 3.1, the key to the lookup table is converted to a
040     * java.lang.String. This is because we need the key to support hashCode and
041     * equals(Object), allowing it to be the key for a HashMap. See LANG-882.
042     *
043     * @param lookup CharSequence[][] table of size [*][2]
044     */
045    public LookupTranslator(final CharSequence[]... lookup) {
046        lookupMap = new HashMap<>();
047        prefixSet = new HashSet<>();
048        int _shortest = Integer.MAX_VALUE;
049        int _longest = 0;
050        if (lookup != null) {
051            for (final CharSequence[] seq : lookup) {
052                this.lookupMap.put(seq[0].toString(), seq[1].toString());
053                this.prefixSet.add(seq[0].charAt(0));
054                final int sz = seq[0].length();
055                if (sz < _shortest) {
056                    _shortest = sz;
057                }
058                if (sz > _longest) {
059                    _longest = sz;
060                }
061            }
062        }
063        shortest = _shortest;
064        longest = _longest;
065    }
066
067    /**
068     * {@inheritDoc}
069     */
070    @Override
071    public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
072        // check if translation exists for the input at position index
073        if (prefixSet.contains(input.charAt(index))) {
074            int max = longest;
075            if (index + longest > input.length()) {
076                max = input.length() - index;
077            }
078            // implement greedy algorithm by trying maximum match first
079            for (int i = max; i >= shortest; i--) {
080                final CharSequence subSeq = input.subSequence(index, index + i);
081                final String result = lookupMap.get(subSeq.toString());
082
083                if (result != null) {
084                    out.write(result);
085                    return i;
086                }
087            }
088        }
089        return 0;
090    }
091}