View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3.text.translate;
18  
19  import java.io.IOException;
20  import java.io.Writer;
21  
22  /**
23   * Translates codepoints to their Unicode escaped value.
24   *
25   * @since 3.0
26   * @version $Id: UnicodeEscaper.java 1448287 2013-02-20 16:47:42Z tn $
27   */
28  public class UnicodeEscaper extends CodePointTranslator {
29  
30      private final int below;
31      private final int above;
32      private final boolean between;
33  
34      /**
35       * <p>Constructs a <code>UnicodeEscaper</code> for all characters. </p>
36       */
37      public UnicodeEscaper(){
38          this(0, Integer.MAX_VALUE, true);
39      }
40  
41      /**
42       * <p>Constructs a <code>UnicodeEscaper</code> for the specified range. This is
43       * the underlying method for the other constructors/builders. The <code>below</code>
44       * and <code>above</code> boundaries are inclusive when <code>between</code> is
45       * <code>true</code> and exclusive when it is <code>false</code>. </p>
46       *
47       * @param below int value representing the lowest codepoint boundary
48       * @param above int value representing the highest codepoint boundary
49       * @param between whether to escape between the boundaries or outside them
50       */
51      protected UnicodeEscaper(final int below, final int above, final boolean between) {
52          this.below = below;
53          this.above = above;
54          this.between = between;
55      }
56  
57      /**
58       * <p>Constructs a <code>UnicodeEscaper</code> below the specified value (exclusive). </p>
59       *
60       * @param codepoint below which to escape
61       * @return the newly created {@code UnicodeEscaper} instance
62       */
63      public static UnicodeEscaper below(final int codepoint) {
64          return outsideOf(codepoint, Integer.MAX_VALUE);
65      }
66  
67      /**
68       * <p>Constructs a <code>UnicodeEscaper</code> above the specified value (exclusive). </p>
69       *
70       * @param codepoint above which to escape
71       * @return the newly created {@code UnicodeEscaper} instance
72       */
73      public static UnicodeEscaper above(final int codepoint) {
74          return outsideOf(0, codepoint);
75      }
76  
77      /**
78       * <p>Constructs a <code>UnicodeEscaper</code> outside of the specified values (exclusive). </p>
79       *
80       * @param codepointLow below which to escape
81       * @param codepointHigh above which to escape
82       * @return the newly created {@code UnicodeEscaper} instance
83       */
84      public static UnicodeEscaper outsideOf(final int codepointLow, final int codepointHigh) {
85          return new UnicodeEscaper(codepointLow, codepointHigh, false);
86      }
87  
88      /**
89       * <p>Constructs a <code>UnicodeEscaper</code> between the specified values (inclusive). </p>
90       *
91       * @param codepointLow above which to escape
92       * @param codepointHigh below which to escape
93       * @return the newly created {@code UnicodeEscaper} instance
94       */
95      public static UnicodeEscaper between(final int codepointLow, final int codepointHigh) {
96          return new UnicodeEscaper(codepointLow, codepointHigh, true);
97      }
98  
99      /**
100      * {@inheritDoc}
101      */
102     @Override
103     public boolean translate(final int codepoint, final Writer out) throws IOException {
104         if (between) {
105             if (codepoint < below || codepoint > above) {
106                 return false;
107             }
108         } else {
109             if (codepoint >= below && codepoint <= above) {
110                 return false;
111             }
112         }
113 
114         // TODO: Handle potential + sign per various Unicode escape implementations
115         if (codepoint > 0xffff) {
116             out.write(toUtf16Escape(codepoint));
117         } else if (codepoint > 0xfff) {
118             out.write("\\u" + hex(codepoint));
119         } else if (codepoint > 0xff) {
120             out.write("\\u0" + hex(codepoint));
121         } else if (codepoint > 0xf) {
122             out.write("\\u00" + hex(codepoint));
123         } else {
124             out.write("\\u000" + hex(codepoint));
125         }
126         return true;
127     }
128 
129     /**
130      * Converts the given codepoint to a hex string of the form {@code "\\uXXXX"}
131      * 
132      * @param codepoint
133      *            a Unicode code point
134      * @return the hex string for the given codepoint
135      */
136     protected String toUtf16Escape(final int codepoint) {
137         return "\\u" + hex(codepoint);
138     }
139 }