1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.text.translate;
18
19 import java.io.IOException;
20 import java.io.Writer;
21
22 /**
23 * Translates codepoints to their Unicode escaped value.
24 *
25 * @since 1.0
26 */
27 public class UnicodeEscaper extends CodePointTranslator {
28
29 private final int below;
30 private final int above;
31 private final boolean between;
32
33 /**
34 * <p>Constructs a <code>UnicodeEscaper</code> for all characters. </p>
35 */
36 public UnicodeEscaper(){
37 this(0, Integer.MAX_VALUE, true);
38 }
39
40 /**
41 * <p>Constructs a <code>UnicodeEscaper</code> for the specified range. This is
42 * the underlying method for the other constructors/builders. The <code>below</code>
43 * and <code>above</code> boundaries are inclusive when <code>between</code> is
44 * <code>true</code> and exclusive when it is <code>false</code>. </p>
45 *
46 * @param below int value representing the lowest codepoint boundary
47 * @param above int value representing the highest codepoint boundary
48 * @param between whether to escape between the boundaries or outside them
49 */
50 protected UnicodeEscaper(final int below, final int above, final boolean between) {
51 this.below = below;
52 this.above = above;
53 this.between = between;
54 }
55
56 /**
57 * <p>Constructs a <code>UnicodeEscaper</code> below the specified value (exclusive). </p>
58 *
59 * @param codepoint below which to escape
60 * @return the newly created {@code UnicodeEscaper} instance
61 */
62 public static UnicodeEscaper below(final int codepoint) {
63 return outsideOf(codepoint, Integer.MAX_VALUE);
64 }
65
66 /**
67 * <p>Constructs a <code>UnicodeEscaper</code> above the specified value (exclusive). </p>
68 *
69 * @param codepoint above which to escape
70 * @return the newly created {@code UnicodeEscaper} instance
71 */
72 public static UnicodeEscaper above(final int codepoint) {
73 return outsideOf(0, codepoint);
74 }
75
76 /**
77 * <p>Constructs a <code>UnicodeEscaper</code> outside of the specified values (exclusive). </p>
78 *
79 * @param codepointLow below which to escape
80 * @param codepointHigh above which to escape
81 * @return the newly created {@code UnicodeEscaper} instance
82 */
83 public static UnicodeEscaper outsideOf(final int codepointLow, final int codepointHigh) {
84 return new UnicodeEscaper(codepointLow, codepointHigh, false);
85 }
86
87 /**
88 * <p>Constructs a <code>UnicodeEscaper</code> between the specified values (inclusive). </p>
89 *
90 * @param codepointLow above which to escape
91 * @param codepointHigh below which to escape
92 * @return the newly created {@code UnicodeEscaper} instance
93 */
94 public static UnicodeEscaper between(final int codepointLow, final int codepointHigh) {
95 return new UnicodeEscaper(codepointLow, codepointHigh, true);
96 }
97
98 /**
99 * {@inheritDoc}
100 */
101 @Override
102 public boolean translate(final int codepoint, final Writer out) throws IOException {
103 if (between) {
104 if (codepoint < below || codepoint > above) {
105 return false;
106 }
107 } else {
108 if (codepoint >= below && codepoint <= above) {
109 return false;
110 }
111 }
112
113 // TODO: Handle potential + sign per various Unicode escape implementations
114 if (codepoint > 0xffff) {
115 out.write(toUtf16Escape(codepoint));
116 } else {
117 out.write("\\u");
118 out.write(HEX_DIGITS[(codepoint >> 12) & 15]);
119 out.write(HEX_DIGITS[(codepoint >> 8) & 15]);
120 out.write(HEX_DIGITS[(codepoint >> 4) & 15]);
121 out.write(HEX_DIGITS[(codepoint) & 15]);
122 }
123 return true;
124 }
125
126 /**
127 * Converts the given codepoint to a hex string of the form {@code "\\uXXXX"}
128 *
129 * @param codepoint
130 * a Unicode code point
131 * @return the hex string for the given codepoint
132 *
133 */
134 protected String toUtf16Escape(final int codepoint) {
135 return "\\u" + hex(codepoint);
136 }
137 }