001 /* 002 * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons-sandbox//xmlio/src/java/org/apache/commons/xmlio/out/XMLEncode.java,v 1.1 2004/10/08 11:56:20 ozeigermann Exp $ 003 * $Revision: 155476 $ 004 * $Date: 2005-02-26 13:31:24 +0000 (Sat, 26 Feb 2005) $ 005 * 006 * ==================================================================== 007 * 008 * Copyright 2004 The Apache Software Foundation 009 * 010 * Licensed under the Apache License, Version 2.0 (the "License"); 011 * you may not use this file except in compliance with the License. 012 * You may obtain a copy of the License at 013 * 014 * http://www.apache.org/licenses/LICENSE-2.0 015 * 016 * Unless required by applicable law or agreed to in writing, software 017 * distributed under the License is distributed on an "AS IS" BASIS, 018 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 019 * See the License for the specific language governing permissions and 020 * limitations under the License. 021 * 022 */ 023 024 package org.apache.commons.xmlio.out; 025 026 /** 027 * Collection of XML encoding/decoding helpers. <br> 028 * This is all about the special characters & and <, and for attributes 029 * " and '. These must be encoded/decoded from/to XML. 030 * 031 */ 032 public final class XMLEncode { 033 034 private final static int CDATA_BLOCK_THRESHOLD_LENGTH = 12; 035 private final static char DEFAULT_QUOTE_CHAR = '"'; 036 037 /** Checks if this text purely consists of the white space characters 038 * ' ', TAB, NEWLINE. 039 */ 040 public final static boolean isWhiteSpace(String text) { 041 for (int i = 0; i < text.length(); i++) { 042 char c = text.charAt(i); 043 if (Character.isWhitespace(c)) { 044 continue; 045 } else { 046 return false; 047 } 048 } 049 return true; 050 } 051 052 /** Makes any text fit into XML attributes. */ 053 public final static String xmlEncodeTextForAttribute(String text, char quoteChar) { 054 if (text == null) 055 return null; 056 return xmlEncodeTextAsPCDATA(text, true, quoteChar); 057 } 058 059 /** Encodes text as XML in the most suitable way, either CDATA block or PCDATA. */ 060 public final static String xmlEncodeText(String text) { 061 if (text == null) 062 return null; 063 if (!needsEncoding(text)) { 064 return text; 065 } else { 066 // only encode as cdata if is is longer than CDATA block overhead: 067 if (text.length() > CDATA_BLOCK_THRESHOLD_LENGTH) { 068 String cdata = xmlEncodeTextAsCDATABlock(text); 069 if (cdata != null) { 070 return cdata; 071 } 072 } 073 } 074 // if every thing else fails, do it the save way... 075 return xmlEncodeTextAsPCDATA(text); 076 } 077 078 /** Encodes any text as PCDATA. */ 079 public final static String xmlEncodeTextAsPCDATA(String text) { 080 if (text == null) 081 return null; 082 return xmlEncodeTextAsPCDATA(text, false); 083 } 084 085 /** Encodes any text as PCDATA. 086 * @param forAttribute if you want 087 * quotes and apostrophes specially treated for attributes 088 */ 089 public final static String xmlEncodeTextAsPCDATA(String text, boolean forAttribute) { 090 return xmlEncodeTextAsPCDATA(text, forAttribute, DEFAULT_QUOTE_CHAR); 091 } 092 093 /** Encodes any text as PCDATA. 094 * @param forAttribute if you want 095 * quotes and apostrophes specially treated for attributes 096 * @param quoteChar if this is for attributes this <code>char</code> is used to quote the attribute value 097 */ 098 public final static String xmlEncodeTextAsPCDATA(String text, boolean forAttribute, char quoteChar) { 099 if (text == null) 100 return null; 101 char c; 102 StringBuffer n = new StringBuffer(text.length() * 2); 103 for (int i = 0; i < text.length(); i++) { 104 c = text.charAt(i); 105 switch (c) { 106 case '&' : 107 n.append("&"); 108 break; 109 case '<' : 110 n.append("<"); 111 break; 112 case '>' : // FIX for sourceforge bug #802520 ("]]>" needs encoding) 113 n.append(">"); 114 break; 115 case '"' : 116 if (forAttribute) 117 n.append("""); 118 else 119 n.append(c); 120 break; 121 case '\'' : 122 if (forAttribute) 123 n.append("'"); 124 else 125 n.append(c); 126 break; 127 default : 128 { 129 n.append(c); 130 break; 131 } 132 } 133 } 134 135 if (forAttribute) { 136 n.append(quoteChar); 137 n.insert(0, quoteChar); 138 } 139 140 return n.toString(); 141 } 142 143 /** Returns string as CDATA block if possible, otherwise null. */ 144 public final static String xmlEncodeTextAsCDATABlock(String text) { 145 if (text == null) 146 return null; 147 if (isCompatibleWithCDATABlock(text)) { 148 return "<![CDATA[" + text + "]]>"; 149 } else { 150 return null; 151 } 152 } 153 154 /** Checks if this text needs encoding in order to be represented in XML. */ 155 public final static boolean needsEncoding(String text) { 156 return needsEncoding(text, false); 157 } 158 159 /** Checks if this text needs encoding in order to be represented in XML. 160 * 161 * Set <code>checkForAttr</code> if you want to check for storability in 162 * an attribute. 163 */ 164 public final static boolean needsEncoding(String data, boolean checkForAttr) { 165 if (data == null) 166 return false; 167 char c; 168 for (int i = 0; i < data.length(); i++) { 169 c = data.charAt(i); 170 if (c == '&' || c == '<' || (checkForAttr && (c == '"' || c == '\''))) 171 return true; 172 } 173 return false; 174 } 175 176 /** Can this text be stored into a CDATA block? */ 177 public final static boolean isCompatibleWithCDATABlock(String text) { 178 if (text == null) 179 return false; 180 return (text.indexOf("]]>") == -1); 181 } 182 183 /** Make CDATA out of possibly encoded PCDATA. <br> 184 * E.g. make '&' out of '&amp;' 185 */ 186 public final static String xmlDecodeTextToCDATA(String pcdata) { 187 if (pcdata == null) 188 return null; 189 char c, c1, c2, c3, c4, c5; 190 StringBuffer n = new StringBuffer(pcdata.length()); 191 for (int i = 0; i < pcdata.length(); i++) { 192 c = pcdata.charAt(i); 193 if (c == '&') { 194 c1 = lookAhead(1, i, pcdata); 195 c2 = lookAhead(2, i, pcdata); 196 c3 = lookAhead(3, i, pcdata); 197 c4 = lookAhead(4, i, pcdata); 198 c5 = lookAhead(5, i, pcdata); 199 200 if (c1 == 'a' && c2 == 'm' && c3 == 'p' && c4 == ';') { 201 n.append("&"); 202 i += 4; 203 } else if (c1 == 'l' && c2 == 't' && c3 == ';') { 204 n.append("<"); 205 i += 3; 206 } else if (c1 == 'g' && c2 == 't' && c3 == ';') { 207 n.append(">"); 208 i += 3; 209 } else if (c1 == 'q' && c2 == 'u' && c3 == 'o' && c4 == 't' && c5 == ';') { 210 n.append("\""); 211 i += 5; 212 } else if (c1 == 'a' && c2 == 'p' && c3 == 'o' && c4 == 's' && c5 == ';') { 213 n.append("'"); 214 i += 5; 215 } else 216 n.append("&"); 217 } else 218 n.append(c); 219 } 220 return n.toString(); 221 } 222 223 private final static char lookAhead(int la, int offset, String data) { 224 try { 225 return data.charAt(offset + la); 226 } catch (StringIndexOutOfBoundsException e) { 227 return 0x0; 228 } 229 } 230 231 // combine multiple checks in one methods for speed 232 private final static boolean contains(String text, char[] chars) { 233 if (text == null || chars == null || chars.length == 0) { 234 return false; 235 } 236 for (int i = 0; i < text.length(); i++) { 237 char c = text.charAt(i); 238 for (int j = 0; j < chars.length; j++) { 239 if (chars[j] == c) { 240 return true; 241 } 242 } 243 } 244 return false; 245 } 246 247 }