CharSequenceTranslator.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.lang3.text.translate;

  18. import java.io.IOException;
  19. import java.io.StringWriter;
  20. import java.io.UncheckedIOException;
  21. import java.io.Writer;
  22. import java.util.Locale;
  23. import java.util.Objects;

  24. import org.apache.commons.lang3.ArrayUtils;

  25. /**
  26.  * An API for translating text.
  27.  * Its core use is to escape and unescape text. Because escaping and unescaping
  28.  * is completely contextual, the API does not present two separate signatures.
  29.  *
  30.  * @since 3.0
  31.  * @deprecated As of 3.6, use Apache Commons Text
  32.  * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/translate/CharSequenceTranslator.html">
  33.  * CharSequenceTranslator</a> instead
  34.  */
  35. @Deprecated
  36. public abstract class CharSequenceTranslator {

  37.     static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};

  38.     /**
  39.      * Returns an upper case hexadecimal {@link String} for the given
  40.      * character.
  41.      *
  42.      * @param codePoint The code point to convert.
  43.      * @return An upper case hexadecimal {@link String}
  44.      */
  45.     public static String hex(final int codePoint) {
  46.         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
  47.     }

  48.     /**
  49.      * Constructs a new instance.
  50.      */
  51.     public CharSequenceTranslator() {
  52.         // empty
  53.     }

  54.     /**
  55.      * Helper for non-Writer usage.
  56.      * @param input CharSequence to be translated
  57.      * @return String output of translation
  58.      */
  59.     public final String translate(final CharSequence input) {
  60.         if (input == null) {
  61.             return null;
  62.         }
  63.         try {
  64.             final StringWriter writer = new StringWriter(input.length() * 2);
  65.             translate(input, writer);
  66.             return writer.toString();
  67.         } catch (final IOException ioe) {
  68.             // this should never ever happen while writing to a StringWriter
  69.             throw new UncheckedIOException(ioe);
  70.         }
  71.     }

  72.     /**
  73.      * Translate a set of code points, represented by an int index into a CharSequence,
  74.      * into another set of code points. The number of code points consumed must be returned,
  75.      * and the only IOExceptions thrown must be from interacting with the Writer so that
  76.      * the top level API may reliably ignore StringWriter IOExceptions.
  77.      *
  78.      * @param input CharSequence that is being translated
  79.      * @param index int representing the current point of translation
  80.      * @param out Writer to translate the text to
  81.      * @return int count of code points consumed
  82.      * @throws IOException if and only if the Writer produces an IOException
  83.      */
  84.     public abstract int translate(CharSequence input, int index, Writer out) throws IOException;

  85.     /**
  86.      * Translate an input onto a Writer. This is intentionally final as its algorithm is
  87.      * tightly coupled with the abstract method of this class.
  88.      *
  89.      * @param input CharSequence that is being translated
  90.      * @param writer Writer to translate the text to
  91.      * @throws IOException if and only if the Writer produces an IOException
  92.      */
  93.     @SuppressWarnings("resource") // Caller closes writer
  94.     public final void translate(final CharSequence input, final Writer writer) throws IOException {
  95.         Objects.requireNonNull(writer, "writer");
  96.         if (input == null) {
  97.             return;
  98.         }
  99.         int pos = 0;
  100.         final int len = input.length();
  101.         while (pos < len) {
  102.             final int consumed = translate(input, pos, writer);
  103.             if (consumed == 0) {
  104.                 // inlined implementation of Character.toChars(Character.codePointAt(input, pos))
  105.                 // avoids allocating temp char arrays and duplicate checks
  106.                 final char c1 = input.charAt(pos);
  107.                 writer.write(c1);
  108.                 pos++;
  109.                 if (Character.isHighSurrogate(c1) && pos < len) {
  110.                     final char c2 = input.charAt(pos);
  111.                     if (Character.isLowSurrogate(c2)) {
  112.                       writer.write(c2);
  113.                       pos++;
  114.                     }
  115.                 }
  116.                 continue;
  117.             }
  118.             // contract with translators is that they have to understand code points
  119.             // and they just took care of a surrogate pair
  120.             for (int pt = 0; pt < consumed; pt++) {
  121.                 pos += Character.charCount(Character.codePointAt(input, pos));
  122.             }
  123.         }
  124.     }

  125.     /**
  126.      * Helper method to create a merger of this translator with another set of
  127.      * translators. Useful in customizing the standard functionality.
  128.      *
  129.      * @param translators CharSequenceTranslator array of translators to merge with this one
  130.      * @return CharSequenceTranslator merging this translator with the others
  131.      */
  132.     public final CharSequenceTranslator with(final CharSequenceTranslator... translators) {
  133.         final CharSequenceTranslator[] newArray = new CharSequenceTranslator[translators.length + 1];
  134.         newArray[0] = this;
  135.         return new AggregateTranslator(ArrayUtils.arraycopy(translators, 0, newArray, 1, translators.length));
  136.     }

  137. }