1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.text.translate; 18 19 import java.io.IOException; 20 import java.io.Writer; 21 22 /** 23 * Translates codepoints to their Unicode escaped value. 24 * 25 * @since 1.0 26 */ 27 public class UnicodeEscaper extends CodePointTranslator { 28 29 private final int below; 30 private final int above; 31 private final boolean between; 32 33 /** 34 * <p>Constructs a <code>UnicodeEscaper</code> for all characters. </p> 35 */ 36 public UnicodeEscaper(){ 37 this(0, Integer.MAX_VALUE, true); 38 } 39 40 /** 41 * <p>Constructs a <code>UnicodeEscaper</code> for the specified range. This is 42 * the underlying method for the other constructors/builders. The <code>below</code> 43 * and <code>above</code> boundaries are inclusive when <code>between</code> is 44 * <code>true</code> and exclusive when it is <code>false</code>. </p> 45 * 46 * @param below int value representing the lowest codepoint boundary 47 * @param above int value representing the highest codepoint boundary 48 * @param between whether to escape between the boundaries or outside them 49 */ 50 protected UnicodeEscaper(final int below, final int above, final boolean between) { 51 this.below = below; 52 this.above = above; 53 this.between = between; 54 } 55 56 /** 57 * <p>Constructs a <code>UnicodeEscaper</code> below the specified value (exclusive). </p> 58 * 59 * @param codepoint below which to escape 60 * @return the newly created {@code UnicodeEscaper} instance 61 */ 62 public static UnicodeEscaper below(final int codepoint) { 63 return outsideOf(codepoint, Integer.MAX_VALUE); 64 } 65 66 /** 67 * <p>Constructs a <code>UnicodeEscaper</code> above the specified value (exclusive). </p> 68 * 69 * @param codepoint above which to escape 70 * @return the newly created {@code UnicodeEscaper} instance 71 */ 72 public static UnicodeEscaper above(final int codepoint) { 73 return outsideOf(0, codepoint); 74 } 75 76 /** 77 * <p>Constructs a <code>UnicodeEscaper</code> outside of the specified values (exclusive). </p> 78 * 79 * @param codepointLow below which to escape 80 * @param codepointHigh above which to escape 81 * @return the newly created {@code UnicodeEscaper} instance 82 */ 83 public static UnicodeEscaper outsideOf(final int codepointLow, final int codepointHigh) { 84 return new UnicodeEscaper(codepointLow, codepointHigh, false); 85 } 86 87 /** 88 * <p>Constructs a <code>UnicodeEscaper</code> between the specified values (inclusive). </p> 89 * 90 * @param codepointLow above which to escape 91 * @param codepointHigh below which to escape 92 * @return the newly created {@code UnicodeEscaper} instance 93 */ 94 public static UnicodeEscaper between(final int codepointLow, final int codepointHigh) { 95 return new UnicodeEscaper(codepointLow, codepointHigh, true); 96 } 97 98 /** 99 * {@inheritDoc} 100 */ 101 @Override 102 public boolean translate(final int codepoint, final Writer out) throws IOException { 103 if (between) { 104 if (codepoint < below || codepoint > above) { 105 return false; 106 } 107 } else { 108 if (codepoint >= below && codepoint <= above) { 109 return false; 110 } 111 } 112 113 // TODO: Handle potential + sign per various Unicode escape implementations 114 if (codepoint > 0xffff) { 115 out.write(toUtf16Escape(codepoint)); 116 } else { 117 out.write("\\u"); 118 out.write(HEX_DIGITS[(codepoint >> 12) & 15]); 119 out.write(HEX_DIGITS[(codepoint >> 8) & 15]); 120 out.write(HEX_DIGITS[(codepoint >> 4) & 15]); 121 out.write(HEX_DIGITS[(codepoint) & 15]); 122 } 123 return true; 124 } 125 126 /** 127 * Converts the given codepoint to a hex string of the form {@code "\\uXXXX"} 128 * 129 * @param codepoint 130 * a Unicode code point 131 * @return the hex string for the given codepoint 132 * 133 */ 134 protected String toUtf16Escape(final int codepoint) { 135 return "\\u" + hex(codepoint); 136 } 137 }