001 /*
002 * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons-sandbox//xmlio/src/java/org/apache/commons/xmlio/out/XMLEncode.java,v 1.1 2004/10/08 11:56:20 ozeigermann Exp $
003 * $Revision: 155476 $
004 * $Date: 2005-02-26 13:31:24 +0000 (Sat, 26 Feb 2005) $
005 *
006 * ====================================================================
007 *
008 * Copyright 2004 The Apache Software Foundation
009 *
010 * Licensed under the Apache License, Version 2.0 (the "License");
011 * you may not use this file except in compliance with the License.
012 * You may obtain a copy of the License at
013 *
014 * http://www.apache.org/licenses/LICENSE-2.0
015 *
016 * Unless required by applicable law or agreed to in writing, software
017 * distributed under the License is distributed on an "AS IS" BASIS,
018 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
019 * See the License for the specific language governing permissions and
020 * limitations under the License.
021 *
022 */
023
024 package org.apache.commons.xmlio.out;
025
026 /**
027 * Collection of XML encoding/decoding helpers. <br>
028 * This is all about the special characters & and <, and for attributes
029 * " and '. These must be encoded/decoded from/to XML.
030 *
031 */
032 public final class XMLEncode {
033
034 private final static int CDATA_BLOCK_THRESHOLD_LENGTH = 12;
035 private final static char DEFAULT_QUOTE_CHAR = '"';
036
037 /** Checks if this text purely consists of the white space characters
038 * ' ', TAB, NEWLINE.
039 */
040 public final static boolean isWhiteSpace(String text) {
041 for (int i = 0; i < text.length(); i++) {
042 char c = text.charAt(i);
043 if (Character.isWhitespace(c)) {
044 continue;
045 } else {
046 return false;
047 }
048 }
049 return true;
050 }
051
052 /** Makes any text fit into XML attributes. */
053 public final static String xmlEncodeTextForAttribute(String text, char quoteChar) {
054 if (text == null)
055 return null;
056 return xmlEncodeTextAsPCDATA(text, true, quoteChar);
057 }
058
059 /** Encodes text as XML in the most suitable way, either CDATA block or PCDATA. */
060 public final static String xmlEncodeText(String text) {
061 if (text == null)
062 return null;
063 if (!needsEncoding(text)) {
064 return text;
065 } else {
066 // only encode as cdata if is is longer than CDATA block overhead:
067 if (text.length() > CDATA_BLOCK_THRESHOLD_LENGTH) {
068 String cdata = xmlEncodeTextAsCDATABlock(text);
069 if (cdata != null) {
070 return cdata;
071 }
072 }
073 }
074 // if every thing else fails, do it the save way...
075 return xmlEncodeTextAsPCDATA(text);
076 }
077
078 /** Encodes any text as PCDATA. */
079 public final static String xmlEncodeTextAsPCDATA(String text) {
080 if (text == null)
081 return null;
082 return xmlEncodeTextAsPCDATA(text, false);
083 }
084
085 /** Encodes any text as PCDATA.
086 * @param forAttribute if you want
087 * quotes and apostrophes specially treated for attributes
088 */
089 public final static String xmlEncodeTextAsPCDATA(String text, boolean forAttribute) {
090 return xmlEncodeTextAsPCDATA(text, forAttribute, DEFAULT_QUOTE_CHAR);
091 }
092
093 /** Encodes any text as PCDATA.
094 * @param forAttribute if you want
095 * quotes and apostrophes specially treated for attributes
096 * @param quoteChar if this is for attributes this <code>char</code> is used to quote the attribute value
097 */
098 public final static String xmlEncodeTextAsPCDATA(String text, boolean forAttribute, char quoteChar) {
099 if (text == null)
100 return null;
101 char c;
102 StringBuffer n = new StringBuffer(text.length() * 2);
103 for (int i = 0; i < text.length(); i++) {
104 c = text.charAt(i);
105 switch (c) {
106 case '&' :
107 n.append("&");
108 break;
109 case '<' :
110 n.append("<");
111 break;
112 case '>' : // FIX for sourceforge bug #802520 ("]]>" needs encoding)
113 n.append(">");
114 break;
115 case '"' :
116 if (forAttribute)
117 n.append(""");
118 else
119 n.append(c);
120 break;
121 case '\'' :
122 if (forAttribute)
123 n.append("'");
124 else
125 n.append(c);
126 break;
127 default :
128 {
129 n.append(c);
130 break;
131 }
132 }
133 }
134
135 if (forAttribute) {
136 n.append(quoteChar);
137 n.insert(0, quoteChar);
138 }
139
140 return n.toString();
141 }
142
143 /** Returns string as CDATA block if possible, otherwise null. */
144 public final static String xmlEncodeTextAsCDATABlock(String text) {
145 if (text == null)
146 return null;
147 if (isCompatibleWithCDATABlock(text)) {
148 return "<![CDATA[" + text + "]]>";
149 } else {
150 return null;
151 }
152 }
153
154 /** Checks if this text needs encoding in order to be represented in XML. */
155 public final static boolean needsEncoding(String text) {
156 return needsEncoding(text, false);
157 }
158
159 /** Checks if this text needs encoding in order to be represented in XML.
160 *
161 * Set <code>checkForAttr</code> if you want to check for storability in
162 * an attribute.
163 */
164 public final static boolean needsEncoding(String data, boolean checkForAttr) {
165 if (data == null)
166 return false;
167 char c;
168 for (int i = 0; i < data.length(); i++) {
169 c = data.charAt(i);
170 if (c == '&' || c == '<' || (checkForAttr && (c == '"' || c == '\'')))
171 return true;
172 }
173 return false;
174 }
175
176 /** Can this text be stored into a CDATA block? */
177 public final static boolean isCompatibleWithCDATABlock(String text) {
178 if (text == null)
179 return false;
180 return (text.indexOf("]]>") == -1);
181 }
182
183 /** Make CDATA out of possibly encoded PCDATA. <br>
184 * E.g. make '&' out of '&amp;'
185 */
186 public final static String xmlDecodeTextToCDATA(String pcdata) {
187 if (pcdata == null)
188 return null;
189 char c, c1, c2, c3, c4, c5;
190 StringBuffer n = new StringBuffer(pcdata.length());
191 for (int i = 0; i < pcdata.length(); i++) {
192 c = pcdata.charAt(i);
193 if (c == '&') {
194 c1 = lookAhead(1, i, pcdata);
195 c2 = lookAhead(2, i, pcdata);
196 c3 = lookAhead(3, i, pcdata);
197 c4 = lookAhead(4, i, pcdata);
198 c5 = lookAhead(5, i, pcdata);
199
200 if (c1 == 'a' && c2 == 'm' && c3 == 'p' && c4 == ';') {
201 n.append("&");
202 i += 4;
203 } else if (c1 == 'l' && c2 == 't' && c3 == ';') {
204 n.append("<");
205 i += 3;
206 } else if (c1 == 'g' && c2 == 't' && c3 == ';') {
207 n.append(">");
208 i += 3;
209 } else if (c1 == 'q' && c2 == 'u' && c3 == 'o' && c4 == 't' && c5 == ';') {
210 n.append("\"");
211 i += 5;
212 } else if (c1 == 'a' && c2 == 'p' && c3 == 'o' && c4 == 's' && c5 == ';') {
213 n.append("'");
214 i += 5;
215 } else
216 n.append("&");
217 } else
218 n.append(c);
219 }
220 return n.toString();
221 }
222
223 private final static char lookAhead(int la, int offset, String data) {
224 try {
225 return data.charAt(offset + la);
226 } catch (StringIndexOutOfBoundsException e) {
227 return 0x0;
228 }
229 }
230
231 // combine multiple checks in one methods for speed
232 private final static boolean contains(String text, char[] chars) {
233 if (text == null || chars == null || chars.length == 0) {
234 return false;
235 }
236 for (int i = 0; i < text.length(); i++) {
237 char c = text.charAt(i);
238 for (int j = 0; j < chars.length; j++) {
239 if (chars[j] == c) {
240 return true;
241 }
242 }
243 }
244 return false;
245 }
246
247 }