View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text.translate;
18  
19  import java.io.IOException;
20  import java.io.Writer;
21  
22  /**
23   * Translates codepoints to their Unicode escaped value.
24   *
25   * @since 1.0
26   */
27  public class UnicodeEscaper extends CodePointTranslator {
28  
29      private final int below;
30      private final int above;
31      private final boolean between;
32  
33      /**
34       * <p>Constructs a <code>UnicodeEscaper</code> for all characters. </p>
35       */
36      public UnicodeEscaper(){
37          this(0, Integer.MAX_VALUE, true);
38      }
39  
40      /**
41       * <p>Constructs a <code>UnicodeEscaper</code> for the specified range. This is
42       * the underlying method for the other constructors/builders. The <code>below</code>
43       * and <code>above</code> boundaries are inclusive when <code>between</code> is
44       * <code>true</code> and exclusive when it is <code>false</code>. </p>
45       *
46       * @param below int value representing the lowest codepoint boundary
47       * @param above int value representing the highest codepoint boundary
48       * @param between whether to escape between the boundaries or outside them
49       */
50      protected UnicodeEscaper(final int below, final int above, final boolean between) {
51          this.below = below;
52          this.above = above;
53          this.between = between;
54      }
55  
56      /**
57       * <p>Constructs a <code>UnicodeEscaper</code> below the specified value (exclusive). </p>
58       *
59       * @param codepoint below which to escape
60       * @return the newly created {@code UnicodeEscaper} instance
61       */
62      public static UnicodeEscaper below(final int codepoint) {
63          return outsideOf(codepoint, Integer.MAX_VALUE);
64      }
65  
66      /**
67       * <p>Constructs a <code>UnicodeEscaper</code> above the specified value (exclusive). </p>
68       *
69       * @param codepoint above which to escape
70       * @return the newly created {@code UnicodeEscaper} instance
71       */
72      public static UnicodeEscaper above(final int codepoint) {
73          return outsideOf(0, codepoint);
74      }
75  
76      /**
77       * <p>Constructs a <code>UnicodeEscaper</code> outside of the specified values (exclusive). </p>
78       *
79       * @param codepointLow below which to escape
80       * @param codepointHigh above which to escape
81       * @return the newly created {@code UnicodeEscaper} instance
82       */
83      public static UnicodeEscaper outsideOf(final int codepointLow, final int codepointHigh) {
84          return new UnicodeEscaper(codepointLow, codepointHigh, false);
85      }
86  
87      /**
88       * <p>Constructs a <code>UnicodeEscaper</code> between the specified values (inclusive). </p>
89       *
90       * @param codepointLow above which to escape
91       * @param codepointHigh below which to escape
92       * @return the newly created {@code UnicodeEscaper} instance
93       */
94      public static UnicodeEscaper between(final int codepointLow, final int codepointHigh) {
95          return new UnicodeEscaper(codepointLow, codepointHigh, true);
96      }
97  
98      /**
99       * {@inheritDoc}
100      */
101     @Override
102     public boolean translate(final int codepoint, final Writer out) throws IOException {
103         if (between) {
104             if (codepoint < below || codepoint > above) {
105                 return false;
106             }
107         } else {
108             if (codepoint >= below && codepoint <= above) {
109                 return false;
110             }
111         }
112 
113         // TODO: Handle potential + sign per various Unicode escape implementations
114         if (codepoint > 0xffff) {
115             out.write(toUtf16Escape(codepoint));
116         } else {
117           out.write("\\u");
118           out.write(HEX_DIGITS[(codepoint >> 12) & 15]);
119           out.write(HEX_DIGITS[(codepoint >> 8) & 15]);
120           out.write(HEX_DIGITS[(codepoint >> 4) & 15]);
121           out.write(HEX_DIGITS[(codepoint) & 15]);
122         }
123         return true;
124     }
125 
126     /**
127      * Converts the given codepoint to a hex string of the form {@code "\\uXXXX"}
128      * 
129      * @param codepoint
130      *            a Unicode code point
131      * @return the hex string for the given codepoint
132      *
133      */
134     protected String toUtf16Escape(final int codepoint) {
135         return "\\u" + hex(codepoint);
136     }
137 }