001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.lang3.text.translate;
018
019 import java.io.IOException;
020 import java.io.Writer;
021
022 /**
023 * Translates codepoints to their Unicode escaped value.
024 *
025 * @since 3.0
026 * @version $Id: UnicodeEscaper.java 1148520 2011-07-19 20:53:23Z ggregory $
027 */
028 public class UnicodeEscaper extends CodePointTranslator {
029
030 private final int below;
031 private final int above;
032 private final boolean between;
033
034 /**
035 * <p>Constructs a <code>UnicodeEscaper</code> for all characters. </p>
036 */
037 public UnicodeEscaper(){
038 this(0, Integer.MAX_VALUE, true);
039 }
040
041 /**
042 * <p>Constructs a <code>UnicodeEscaper</code> for the specified range. This is
043 * the underlying method for the other constructors/builders. The <code>below</code>
044 * and <code>above</code> boundaries are inclusive when <code>between</code> is
045 * <code>true</code> and exclusive when it is <code>false</code>. </p>
046 *
047 * @param below int value representing the lowest codepoint boundary
048 * @param above int value representing the highest codepoint boundary
049 * @param between whether to escape between the boundaries or outside them
050 */
051 private UnicodeEscaper(int below, int above, boolean between) {
052 this.below = below;
053 this.above = above;
054 this.between = between;
055 }
056
057 /**
058 * <p>Constructs a <code>UnicodeEscaper</code> below the specified value (exclusive). </p>
059 *
060 * @param codepoint below which to escape
061 * @return the newly created {@code UnicodeEscaper} instance
062 */
063 public static UnicodeEscaper below(int codepoint) {
064 return outsideOf(codepoint, Integer.MAX_VALUE);
065 }
066
067 /**
068 * <p>Constructs a <code>UnicodeEscaper</code> above the specified value (exclusive). </p>
069 *
070 * @param codepoint above which to escape
071 * @return the newly created {@code UnicodeEscaper} instance
072 */
073 public static UnicodeEscaper above(int codepoint) {
074 return outsideOf(0, codepoint);
075 }
076
077 /**
078 * <p>Constructs a <code>UnicodeEscaper</code> outside of the specified values (exclusive). </p>
079 *
080 * @param codepointLow below which to escape
081 * @param codepointHigh above which to escape
082 * @return the newly created {@code UnicodeEscaper} instance
083 */
084 public static UnicodeEscaper outsideOf(int codepointLow, int codepointHigh) {
085 return new UnicodeEscaper(codepointLow, codepointHigh, false);
086 }
087
088 /**
089 * <p>Constructs a <code>UnicodeEscaper</code> between the specified values (inclusive). </p>
090 *
091 * @param codepointLow above which to escape
092 * @param codepointHigh below which to escape
093 * @return the newly created {@code UnicodeEscaper} instance
094 */
095 public static UnicodeEscaper between(int codepointLow, int codepointHigh) {
096 return new UnicodeEscaper(codepointLow, codepointHigh, true);
097 }
098
099 /**
100 * {@inheritDoc}
101 */
102 @Override
103 public boolean translate(int codepoint, Writer out) throws IOException {
104 if(between) {
105 if (codepoint < below || codepoint > above) {
106 return false;
107 }
108 } else {
109 if (codepoint >= below && codepoint <= above) {
110 return false;
111 }
112 }
113
114 // TODO: Handle potential + sign per various Unicode escape implementations
115 if (codepoint > 0xffff) {
116 // TODO: Figure out what to do. Output as two Unicodes?
117 // Does this make this a Java-specific output class?
118 out.write("\\u" + hex(codepoint));
119 } else if (codepoint > 0xfff) {
120 out.write("\\u" + hex(codepoint));
121 } else if (codepoint > 0xff) {
122 out.write("\\u0" + hex(codepoint));
123 } else if (codepoint > 0xf) {
124 out.write("\\u00" + hex(codepoint));
125 } else {
126 out.write("\\u000" + hex(codepoint));
127 }
128 return true;
129 }
130 }