CharSequenceTranslator.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.text.translate;

  18. import java.io.IOException;
  19. import java.io.StringWriter;
  20. import java.io.Writer;
  21. import java.util.Locale;

  22. /**
  23.  * An API for translating text.
  24.  * Its core use is to escape and unescape text. Because escaping and unescaping
  25.  * is completely contextual, the API does not present two separate signatures.
  26.  *
  27.  * @since 1.0
  28.  */
  29. public abstract class CharSequenceTranslator {

  30.     static final char[] HEX_DIGITS = new char[] {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};

  31.     /**
  32.      * Translate a set of codepoints, represented by an int index into a CharSequence,
  33.      * into another set of codepoints. The number of codepoints consumed must be returned,
  34.      * and the only IOExceptions thrown must be from interacting with the Writer so that
  35.      * the top level API may reliably ignore StringWriter IOExceptions.
  36.      *
  37.      * @param input CharSequence that is being translated
  38.      * @param index int representing the current point of translation
  39.      * @param out Writer to translate the text to
  40.      * @return int count of codepoints consumed
  41.      * @throws IOException if and only if the Writer produces an IOException
  42.      */
  43.     public abstract int translate(CharSequence input, int index, Writer out) throws IOException;

  44.     /**
  45.      * Helper for non-Writer usage.
  46.      * @param input CharSequence to be translated
  47.      * @return String output of translation
  48.      */
  49.     public final String translate(final CharSequence input) {
  50.         if (input == null) {
  51.             return null;
  52.         }
  53.         try {
  54.             final StringWriter writer = new StringWriter(input.length() * 2);
  55.             translate(input, writer);
  56.             return writer.toString();
  57.         } catch (final IOException ioe) {
  58.             // this should never ever happen while writing to a StringWriter
  59.             throw new RuntimeException(ioe);
  60.         }
  61.     }

  62.     /**
  63.      * Translate an input onto a Writer. This is intentionally final as its algorithm is
  64.      * tightly coupled with the abstract method of this class.
  65.      *
  66.      * @param input CharSequence that is being translated
  67.      * @param out Writer to translate the text to
  68.      * @throws IOException if and only if the Writer produces an IOException
  69.      */
  70.     public final void translate(final CharSequence input, final Writer out) throws IOException {
  71.         if (out == null) {
  72.             throw new IllegalArgumentException("The Writer must not be null");
  73.         }
  74.         if (input == null) {
  75.             return;
  76.         }
  77.         int pos = 0;
  78.         final int len = input.length();
  79.         while (pos < len) {
  80.             final int consumed = translate(input, pos, out);
  81.             if (consumed == 0) {
  82.                 // inlined implementation of Character.toChars(Character.codePointAt(input, pos))
  83.                 // avoids allocating temp char arrays and duplicate checks
  84.                 final char c1 = input.charAt(pos);
  85.                 out.write(c1);
  86.                 pos++;
  87.                 if (Character.isHighSurrogate(c1) && pos < len) {
  88.                     final char c2 = input.charAt(pos);
  89.                     if (Character.isLowSurrogate(c2)) {
  90.                       out.write(c2);
  91.                       pos++;
  92.                     }
  93.                 }
  94.                 continue;
  95.             }
  96.             // contract with translators is that they have to understand codepoints
  97.             // and they just took care of a surrogate pair
  98.             for (int pt = 0; pt < consumed; pt++) {
  99.                 pos += Character.charCount(Character.codePointAt(input, pos));
  100.             }
  101.         }
  102.     }

  103.     /**
  104.      * Helper method to create a merger of this translator with another set of
  105.      * translators. Useful in customizing the standard functionality.
  106.      *
  107.      * @param translators CharSequenceTranslator array of translators to merge with this one
  108.      * @return CharSequenceTranslator merging this translator with the others
  109.      */
  110.     public final CharSequenceTranslator with(final CharSequenceTranslator... translators) {
  111.         final CharSequenceTranslator[] newArray = new CharSequenceTranslator[translators.length + 1];
  112.         newArray[0] = this;
  113.         System.arraycopy(translators, 0, newArray, 1, translators.length);
  114.         return new AggregateTranslator(newArray);
  115.     }

  116.     /**
  117.      * <p>Returns an upper case hexadecimal <code>String</code> for the given
  118.      * character.</p>
  119.      *
  120.      * @param codepoint The codepoint to convert.
  121.      * @return An upper case hexadecimal <code>String</code>
  122.      */
  123.     public static String hex(final int codepoint) {
  124.         return Integer.toHexString(codepoint).toUpperCase(Locale.ENGLISH);
  125.     }

  126. }