001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text.translate; 018 019import java.io.IOException; 020import java.io.Writer; 021 022/** 023 * Translates codepoints to their Unicode escaped value. 024 * 025 * @since 1.0 026 */ 027public class UnicodeEscaper extends CodePointTranslator { 028 029 /** int value representing the lowest codepoint boundary. */ 030 private final int below; 031 /** int value representing the highest codepoint boundary. */ 032 private final int above; 033 /** whether to escape between the boundaries or outside them. */ 034 private final boolean between; 035 036 /** 037 * <p>Constructs a {@code UnicodeEscaper} for all characters. 038 * </p> 039 */ 040 public UnicodeEscaper() { 041 this(0, Integer.MAX_VALUE, true); 042 } 043 044 /** 045 * <p>Constructs a {@code UnicodeEscaper} for the specified range. This is 046 * the underlying method for the other constructors/builders. The {@code below} 047 * and {@code above} boundaries are inclusive when {@code between} is 048 * {@code true} and exclusive when it is {@code false}.</p> 049 * 050 * @param below int value representing the lowest codepoint boundary 051 * @param above int value representing the highest codepoint boundary 052 * @param between whether to escape between the boundaries or outside them 053 */ 054 protected UnicodeEscaper(final int below, final int above, final boolean between) { 055 this.below = below; 056 this.above = above; 057 this.between = between; 058 } 059 060 /** 061 * <p>Constructs a {@code UnicodeEscaper} below the specified value (exclusive).</p> 062 * 063 * @param codepoint below which to escape 064 * @return The newly created {@code UnicodeEscaper} instance 065 */ 066 public static UnicodeEscaper below(final int codepoint) { 067 return outsideOf(codepoint, Integer.MAX_VALUE); 068 } 069 070 /** 071 * <p>Constructs a {@code UnicodeEscaper} above the specified value (exclusive).</p> 072 * 073 * @param codepoint above which to escape 074 * @return The newly created {@code UnicodeEscaper} instance 075 */ 076 public static UnicodeEscaper above(final int codepoint) { 077 return outsideOf(0, codepoint); 078 } 079 080 /** 081 * <p>Constructs a {@code UnicodeEscaper} outside of the specified values (exclusive).</p> 082 * 083 * @param codepointLow below which to escape 084 * @param codepointHigh above which to escape 085 * @return The newly created {@code UnicodeEscaper} instance 086 */ 087 public static UnicodeEscaper outsideOf(final int codepointLow, final int codepointHigh) { 088 return new UnicodeEscaper(codepointLow, codepointHigh, false); 089 } 090 091 /** 092 * <p>Constructs a {@code UnicodeEscaper} between the specified values (inclusive).</p> 093 * 094 * @param codepointLow above which to escape 095 * @param codepointHigh below which to escape 096 * @return The newly created {@code UnicodeEscaper} instance 097 */ 098 public static UnicodeEscaper between(final int codepointLow, final int codepointHigh) { 099 return new UnicodeEscaper(codepointLow, codepointHigh, true); 100 } 101 102 /** 103 * {@inheritDoc} 104 */ 105 @Override 106 public boolean translate(final int codepoint, final Writer out) throws IOException { 107 if (between) { 108 if (codepoint < below || codepoint > above) { 109 return false; 110 } 111 } else { 112 if (codepoint >= below && codepoint <= above) { 113 return false; 114 } 115 } 116 117 if (codepoint > 0xffff) { 118 out.write(toUtf16Escape(codepoint)); 119 } else { 120 out.write("\\u"); 121 out.write(HEX_DIGITS[(codepoint >> 12) & 15]); 122 out.write(HEX_DIGITS[(codepoint >> 8) & 15]); 123 out.write(HEX_DIGITS[(codepoint >> 4) & 15]); 124 out.write(HEX_DIGITS[(codepoint) & 15]); 125 } 126 return true; 127 } 128 129 /** 130 * Converts the given codepoint to a hex string of the form {@code "\\uXXXX"}. 131 * 132 * @param codepoint 133 * a Unicode code point 134 * @return The hex string for the given codepoint 135 * 136 */ 137 protected String toUtf16Escape(final int codepoint) { 138 return "\\u" + hex(codepoint); 139 } 140}