View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text.translate;
18  
19  import java.io.IOException;
20  import java.io.Writer;
21  
22  /**
23   * Translates code points to their Unicode escaped value.
24   *
25   * @since 1.0
26   */
27  public class UnicodeEscaper extends CodePointTranslator {
28  
29      /**
30       * Constructs a {@code UnicodeEscaper} above the specified value (exclusive).
31       *
32       * @param codePoint above which to escape
33       * @return The newly created {@code UnicodeEscaper} instance
34       */
35      public static UnicodeEscaper above(final int codePoint) {
36          return outsideOf(0, codePoint);
37      }
38      /**
39       * Constructs a {@code UnicodeEscaper} below the specified value (exclusive).
40       *
41       * @param codePoint below which to escape
42       * @return The newly created {@code UnicodeEscaper} instance
43       */
44      public static UnicodeEscaper below(final int codePoint) {
45          return outsideOf(codePoint, Integer.MAX_VALUE);
46      }
47      /**
48       * Constructs a {@code UnicodeEscaper} between the specified values (inclusive).
49       *
50       * @param codePointLow above which to escape
51       * @param codePointHigh below which to escape
52       * @return The newly created {@code UnicodeEscaper} instance
53       */
54      public static UnicodeEscaper between(final int codePointLow, final int codePointHigh) {
55          return new UnicodeEscaper(codePointLow, codePointHigh, true);
56      }
57  
58      /**
59       * Constructs a {@code UnicodeEscaper} outside of the specified values (exclusive).
60       *
61       * @param codePointLow below which to escape
62       * @param codePointHigh above which to escape
63       * @return The newly created {@code UnicodeEscaper} instance
64       */
65      public static UnicodeEscaper outsideOf(final int codePointLow, final int codePointHigh) {
66          return new UnicodeEscaper(codePointLow, codePointHigh, false);
67      }
68  
69      /** The lowest code point boundary. */
70      private final int below;
71  
72      /** The highest code point boundary. */
73      private final int above;
74  
75      /** Whether to escape between the boundaries or outside them. */
76      private final boolean between;
77  
78      /**
79       * Constructs a {@code UnicodeEscaper} for all characters.
80       */
81      public UnicodeEscaper() {
82          this(0, Integer.MAX_VALUE, true);
83      }
84  
85      /**
86       * Constructs a {@code UnicodeEscaper} for the specified range. This is
87       * the underlying method for the other constructors/builders. The {@code below}
88       * and {@code above} boundaries are inclusive when {@code between} is
89       * {@code true} and exclusive when it is {@code false}.
90       *
91       * @param below int value representing the lowest code point boundary
92       * @param above int value representing the highest code point boundary
93       * @param between whether to escape between the boundaries or outside them
94       */
95      protected UnicodeEscaper(final int below, final int above, final boolean between) {
96          this.below = below;
97          this.above = above;
98          this.between = between;
99      }
100 
101     /**
102      * Converts the given code point to a hexadecimal string of the form {@code "\\uXXXX"}.
103      *
104      * @param codePoint
105      *            a Unicode code point
106      * @return The hexadecimal string for the given code point
107      */
108     protected String toUtf16Escape(final int codePoint) {
109         return "\\u" + hex(codePoint);
110     }
111 
112     /**
113      * {@inheritDoc}
114      */
115     @Override
116     public boolean translate(final int codePoint, final Writer writer) throws IOException {
117         if (between) {
118             if (codePoint < below || codePoint > above) {
119                 return false;
120             }
121         } else if (codePoint >= below && codePoint <= above) {
122             return false;
123         }
124 
125         if (codePoint > 0xffff) {
126             writer.write(toUtf16Escape(codePoint));
127         } else {
128           writer.write("\\u");
129           writer.write(HEX_DIGITS[codePoint >> 12 & 15]);
130           writer.write(HEX_DIGITS[codePoint >> 8 & 15]);
131           writer.write(HEX_DIGITS[codePoint >> 4 & 15]);
132           writer.write(HEX_DIGITS[codePoint & 15]);
133         }
134         return true;
135     }
136 }