1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.lang3.text.translate;
18
19 import java.io.IOException;
20 import java.io.Writer;
21
22 /**
23 * Translates codepoints to their Unicode escaped value.
24 *
25 * @since 3.0
26 * @version $Id: UnicodeEscaper.java 1448287 2013-02-20 16:47:42Z tn $
27 */
28 public class UnicodeEscaper extends CodePointTranslator {
29
30 private final int below;
31 private final int above;
32 private final boolean between;
33
34 /**
35 * <p>Constructs a <code>UnicodeEscaper</code> for all characters. </p>
36 */
37 public UnicodeEscaper(){
38 this(0, Integer.MAX_VALUE, true);
39 }
40
41 /**
42 * <p>Constructs a <code>UnicodeEscaper</code> for the specified range. This is
43 * the underlying method for the other constructors/builders. The <code>below</code>
44 * and <code>above</code> boundaries are inclusive when <code>between</code> is
45 * <code>true</code> and exclusive when it is <code>false</code>. </p>
46 *
47 * @param below int value representing the lowest codepoint boundary
48 * @param above int value representing the highest codepoint boundary
49 * @param between whether to escape between the boundaries or outside them
50 */
51 protected UnicodeEscaper(final int below, final int above, final boolean between) {
52 this.below = below;
53 this.above = above;
54 this.between = between;
55 }
56
57 /**
58 * <p>Constructs a <code>UnicodeEscaper</code> below the specified value (exclusive). </p>
59 *
60 * @param codepoint below which to escape
61 * @return the newly created {@code UnicodeEscaper} instance
62 */
63 public static UnicodeEscaper below(final int codepoint) {
64 return outsideOf(codepoint, Integer.MAX_VALUE);
65 }
66
67 /**
68 * <p>Constructs a <code>UnicodeEscaper</code> above the specified value (exclusive). </p>
69 *
70 * @param codepoint above which to escape
71 * @return the newly created {@code UnicodeEscaper} instance
72 */
73 public static UnicodeEscaper above(final int codepoint) {
74 return outsideOf(0, codepoint);
75 }
76
77 /**
78 * <p>Constructs a <code>UnicodeEscaper</code> outside of the specified values (exclusive). </p>
79 *
80 * @param codepointLow below which to escape
81 * @param codepointHigh above which to escape
82 * @return the newly created {@code UnicodeEscaper} instance
83 */
84 public static UnicodeEscaper outsideOf(final int codepointLow, final int codepointHigh) {
85 return new UnicodeEscaper(codepointLow, codepointHigh, false);
86 }
87
88 /**
89 * <p>Constructs a <code>UnicodeEscaper</code> between the specified values (inclusive). </p>
90 *
91 * @param codepointLow above which to escape
92 * @param codepointHigh below which to escape
93 * @return the newly created {@code UnicodeEscaper} instance
94 */
95 public static UnicodeEscaper between(final int codepointLow, final int codepointHigh) {
96 return new UnicodeEscaper(codepointLow, codepointHigh, true);
97 }
98
99 /**
100 * {@inheritDoc}
101 */
102 @Override
103 public boolean translate(final int codepoint, final Writer out) throws IOException {
104 if (between) {
105 if (codepoint < below || codepoint > above) {
106 return false;
107 }
108 } else {
109 if (codepoint >= below && codepoint <= above) {
110 return false;
111 }
112 }
113
114 // TODO: Handle potential + sign per various Unicode escape implementations
115 if (codepoint > 0xffff) {
116 out.write(toUtf16Escape(codepoint));
117 } else if (codepoint > 0xfff) {
118 out.write("\\u" + hex(codepoint));
119 } else if (codepoint > 0xff) {
120 out.write("\\u0" + hex(codepoint));
121 } else if (codepoint > 0xf) {
122 out.write("\\u00" + hex(codepoint));
123 } else {
124 out.write("\\u000" + hex(codepoint));
125 }
126 return true;
127 }
128
129 /**
130 * Converts the given codepoint to a hex string of the form {@code "\\uXXXX"}
131 *
132 * @param codepoint
133 * a Unicode code point
134 * @return the hex string for the given codepoint
135 */
136 protected String toUtf16Escape(final int codepoint) {
137 return "\\u" + hex(codepoint);
138 }
139 }