001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.lang3.text.translate; 018 019 import java.io.IOException; 020 import java.io.Writer; 021 022 /** 023 * Translates codepoints to their Unicode escaped value. 024 * 025 * @since 3.0 026 * @version $Id: UnicodeEscaper.java 1148520 2011-07-19 20:53:23Z ggregory $ 027 */ 028 public class UnicodeEscaper extends CodePointTranslator { 029 030 private final int below; 031 private final int above; 032 private final boolean between; 033 034 /** 035 * <p>Constructs a <code>UnicodeEscaper</code> for all characters. </p> 036 */ 037 public UnicodeEscaper(){ 038 this(0, Integer.MAX_VALUE, true); 039 } 040 041 /** 042 * <p>Constructs a <code>UnicodeEscaper</code> for the specified range. This is 043 * the underlying method for the other constructors/builders. The <code>below</code> 044 * and <code>above</code> boundaries are inclusive when <code>between</code> is 045 * <code>true</code> and exclusive when it is <code>false</code>. </p> 046 * 047 * @param below int value representing the lowest codepoint boundary 048 * @param above int value representing the highest codepoint boundary 049 * @param between whether to escape between the boundaries or outside them 050 */ 051 private UnicodeEscaper(int below, int above, boolean between) { 052 this.below = below; 053 this.above = above; 054 this.between = between; 055 } 056 057 /** 058 * <p>Constructs a <code>UnicodeEscaper</code> below the specified value (exclusive). </p> 059 * 060 * @param codepoint below which to escape 061 * @return the newly created {@code UnicodeEscaper} instance 062 */ 063 public static UnicodeEscaper below(int codepoint) { 064 return outsideOf(codepoint, Integer.MAX_VALUE); 065 } 066 067 /** 068 * <p>Constructs a <code>UnicodeEscaper</code> above the specified value (exclusive). </p> 069 * 070 * @param codepoint above which to escape 071 * @return the newly created {@code UnicodeEscaper} instance 072 */ 073 public static UnicodeEscaper above(int codepoint) { 074 return outsideOf(0, codepoint); 075 } 076 077 /** 078 * <p>Constructs a <code>UnicodeEscaper</code> outside of the specified values (exclusive). </p> 079 * 080 * @param codepointLow below which to escape 081 * @param codepointHigh above which to escape 082 * @return the newly created {@code UnicodeEscaper} instance 083 */ 084 public static UnicodeEscaper outsideOf(int codepointLow, int codepointHigh) { 085 return new UnicodeEscaper(codepointLow, codepointHigh, false); 086 } 087 088 /** 089 * <p>Constructs a <code>UnicodeEscaper</code> between the specified values (inclusive). </p> 090 * 091 * @param codepointLow above which to escape 092 * @param codepointHigh below which to escape 093 * @return the newly created {@code UnicodeEscaper} instance 094 */ 095 public static UnicodeEscaper between(int codepointLow, int codepointHigh) { 096 return new UnicodeEscaper(codepointLow, codepointHigh, true); 097 } 098 099 /** 100 * {@inheritDoc} 101 */ 102 @Override 103 public boolean translate(int codepoint, Writer out) throws IOException { 104 if(between) { 105 if (codepoint < below || codepoint > above) { 106 return false; 107 } 108 } else { 109 if (codepoint >= below && codepoint <= above) { 110 return false; 111 } 112 } 113 114 // TODO: Handle potential + sign per various Unicode escape implementations 115 if (codepoint > 0xffff) { 116 // TODO: Figure out what to do. Output as two Unicodes? 117 // Does this make this a Java-specific output class? 118 out.write("\\u" + hex(codepoint)); 119 } else if (codepoint > 0xfff) { 120 out.write("\\u" + hex(codepoint)); 121 } else if (codepoint > 0xff) { 122 out.write("\\u0" + hex(codepoint)); 123 } else if (codepoint > 0xf) { 124 out.write("\\u00" + hex(codepoint)); 125 } else { 126 out.write("\\u000" + hex(codepoint)); 127 } 128 return true; 129 } 130 }