001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3.text.translate; 018 019import java.io.IOException; 020import java.io.Writer; 021 022/** 023 * Translates codepoints to their Unicode escaped value. 024 * 025 * @since 3.0 026 * @version $Id: UnicodeEscaper.java 1552652 2013-12-20 13:23:16Z britter $ 027 */ 028public class UnicodeEscaper extends CodePointTranslator { 029 030 private final int below; 031 private final int above; 032 private final boolean between; 033 034 /** 035 * <p>Constructs a <code>UnicodeEscaper</code> for all characters. </p> 036 */ 037 public UnicodeEscaper(){ 038 this(0, Integer.MAX_VALUE, true); 039 } 040 041 /** 042 * <p>Constructs a <code>UnicodeEscaper</code> for the specified range. This is 043 * the underlying method for the other constructors/builders. The <code>below</code> 044 * and <code>above</code> boundaries are inclusive when <code>between</code> is 045 * <code>true</code> and exclusive when it is <code>false</code>. </p> 046 * 047 * @param below int value representing the lowest codepoint boundary 048 * @param above int value representing the highest codepoint boundary 049 * @param between whether to escape between the boundaries or outside them 050 */ 051 protected UnicodeEscaper(final int below, final int above, final boolean between) { 052 this.below = below; 053 this.above = above; 054 this.between = between; 055 } 056 057 /** 058 * <p>Constructs a <code>UnicodeEscaper</code> below the specified value (exclusive). </p> 059 * 060 * @param codepoint below which to escape 061 * @return the newly created {@code UnicodeEscaper} instance 062 */ 063 public static UnicodeEscaper below(final int codepoint) { 064 return outsideOf(codepoint, Integer.MAX_VALUE); 065 } 066 067 /** 068 * <p>Constructs a <code>UnicodeEscaper</code> above the specified value (exclusive). </p> 069 * 070 * @param codepoint above which to escape 071 * @return the newly created {@code UnicodeEscaper} instance 072 */ 073 public static UnicodeEscaper above(final int codepoint) { 074 return outsideOf(0, codepoint); 075 } 076 077 /** 078 * <p>Constructs a <code>UnicodeEscaper</code> outside of the specified values (exclusive). </p> 079 * 080 * @param codepointLow below which to escape 081 * @param codepointHigh above which to escape 082 * @return the newly created {@code UnicodeEscaper} instance 083 */ 084 public static UnicodeEscaper outsideOf(final int codepointLow, final int codepointHigh) { 085 return new UnicodeEscaper(codepointLow, codepointHigh, false); 086 } 087 088 /** 089 * <p>Constructs a <code>UnicodeEscaper</code> between the specified values (inclusive). </p> 090 * 091 * @param codepointLow above which to escape 092 * @param codepointHigh below which to escape 093 * @return the newly created {@code UnicodeEscaper} instance 094 */ 095 public static UnicodeEscaper between(final int codepointLow, final int codepointHigh) { 096 return new UnicodeEscaper(codepointLow, codepointHigh, true); 097 } 098 099 /** 100 * {@inheritDoc} 101 */ 102 @Override 103 public boolean translate(final int codepoint, final Writer out) throws IOException { 104 if (between) { 105 if (codepoint < below || codepoint > above) { 106 return false; 107 } 108 } else { 109 if (codepoint >= below && codepoint <= above) { 110 return false; 111 } 112 } 113 114 // TODO: Handle potential + sign per various Unicode escape implementations 115 if (codepoint > 0xffff) { 116 out.write(toUtf16Escape(codepoint)); 117 } else if (codepoint > 0xfff) { 118 out.write("\\u" + hex(codepoint)); 119 } else if (codepoint > 0xff) { 120 out.write("\\u0" + hex(codepoint)); 121 } else if (codepoint > 0xf) { 122 out.write("\\u00" + hex(codepoint)); 123 } else { 124 out.write("\\u000" + hex(codepoint)); 125 } 126 return true; 127 } 128 129 /** 130 * Converts the given codepoint to a hex string of the form {@code "\\uXXXX"} 131 * 132 * @param codepoint 133 * a Unicode code point 134 * @return the hex string for the given codepoint 135 * 136 * @since 3.2 137 */ 138 protected String toUtf16Escape(final int codepoint) { 139 return "\\u" + hex(codepoint); 140 } 141}