UnicodeEscaper.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.text.translate;

  18. import java.io.IOException;
  19. import java.io.Writer;

  20. /**
  21.  * Translates code points to their Unicode escaped value.
  22.  *
  23.  * @since 1.0
  24.  */
  25. public class UnicodeEscaper extends CodePointTranslator {

  26.     /**
  27.      * Constructs a {@code UnicodeEscaper} above the specified value (exclusive).
  28.      *
  29.      * @param codePoint above which to escape
  30.      * @return The newly created {@code UnicodeEscaper} instance
  31.      */
  32.     public static UnicodeEscaper above(final int codePoint) {
  33.         return outsideOf(0, codePoint);
  34.     }
  35.     /**
  36.      * Constructs a {@code UnicodeEscaper} below the specified value (exclusive).
  37.      *
  38.      * @param codePoint below which to escape
  39.      * @return The newly created {@code UnicodeEscaper} instance
  40.      */
  41.     public static UnicodeEscaper below(final int codePoint) {
  42.         return outsideOf(codePoint, Integer.MAX_VALUE);
  43.     }
  44.     /**
  45.      * Constructs a {@code UnicodeEscaper} between the specified values (inclusive).
  46.      *
  47.      * @param codePointLow above which to escape
  48.      * @param codePointHigh below which to escape
  49.      * @return The newly created {@code UnicodeEscaper} instance
  50.      */
  51.     public static UnicodeEscaper between(final int codePointLow, final int codePointHigh) {
  52.         return new UnicodeEscaper(codePointLow, codePointHigh, true);
  53.     }

  54.     /**
  55.      * Constructs a {@code UnicodeEscaper} outside of the specified values (exclusive).
  56.      *
  57.      * @param codePointLow below which to escape
  58.      * @param codePointHigh above which to escape
  59.      * @return The newly created {@code UnicodeEscaper} instance
  60.      */
  61.     public static UnicodeEscaper outsideOf(final int codePointLow, final int codePointHigh) {
  62.         return new UnicodeEscaper(codePointLow, codePointHigh, false);
  63.     }

  64.     /** The lowest code point boundary. */
  65.     private final int below;

  66.     /** The highest code point boundary. */
  67.     private final int above;

  68.     /** Whether to escape between the boundaries or outside them. */
  69.     private final boolean between;

  70.     /**
  71.      * Constructs a {@code UnicodeEscaper} for all characters.
  72.      */
  73.     public UnicodeEscaper() {
  74.         this(0, Integer.MAX_VALUE, true);
  75.     }

  76.     /**
  77.      * Constructs a {@code UnicodeEscaper} for the specified range. This is
  78.      * the underlying method for the other constructors/builders. The {@code below}
  79.      * and {@code above} boundaries are inclusive when {@code between} is
  80.      * {@code true} and exclusive when it is {@code false}.
  81.      *
  82.      * @param below int value representing the lowest code point boundary
  83.      * @param above int value representing the highest code point boundary
  84.      * @param between whether to escape between the boundaries or outside them
  85.      */
  86.     protected UnicodeEscaper(final int below, final int above, final boolean between) {
  87.         this.below = below;
  88.         this.above = above;
  89.         this.between = between;
  90.     }

  91.     /**
  92.      * Converts the given code point to a hexadecimal string of the form {@code "\\uXXXX"}.
  93.      *
  94.      * @param codePoint
  95.      *            a Unicode code point
  96.      * @return The hexadecimal string for the given code point
  97.      */
  98.     protected String toUtf16Escape(final int codePoint) {
  99.         return "\\u" + hex(codePoint);
  100.     }

  101.     /**
  102.      * {@inheritDoc}
  103.      */
  104.     @Override
  105.     public boolean translate(final int codePoint, final Writer writer) throws IOException {
  106.         if (between) {
  107.             if (codePoint < below || codePoint > above) {
  108.                 return false;
  109.             }
  110.         } else if (codePoint >= below && codePoint <= above) {
  111.             return false;
  112.         }

  113.         if (codePoint > 0xffff) {
  114.             writer.write(toUtf16Escape(codePoint));
  115.         } else {
  116.           writer.write("\\u");
  117.           writer.write(HEX_DIGITS[codePoint >> 12 & 15]);
  118.           writer.write(HEX_DIGITS[codePoint >> 8 & 15]);
  119.           writer.write(HEX_DIGITS[codePoint >> 4 & 15]);
  120.           writer.write(HEX_DIGITS[codePoint & 15]);
  121.         }
  122.         return true;
  123.     }
  124. }