View Javadoc

1   /*
2    * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons-sandbox//xmlio/src/java/org/apache/commons/xmlio/out/XMLEncode.java,v 1.1 2004/10/08 11:56:20 ozeigermann Exp $
3    * $Revision: 155476 $
4    * $Date: 2005-02-26 13:31:24 +0000 (Sat, 26 Feb 2005) $
5    *
6    * ====================================================================
7    *
8    * Copyright 2004 The Apache Software Foundation 
9    *
10   * Licensed under the Apache License, Version 2.0 (the "License");
11   * you may not use this file except in compliance with the License.
12   * You may obtain a copy of the License at
13   *
14   *     http://www.apache.org/licenses/LICENSE-2.0
15   *
16   * Unless required by applicable law or agreed to in writing, software
17   * distributed under the License is distributed on an "AS IS" BASIS,
18   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19   * See the License for the specific language governing permissions and
20   * limitations under the License.
21   *
22   */
23  
24  package org.apache.commons.xmlio.out;
25  
26  /**
27   * Collection of XML encoding/decoding helpers. <br>
28   * This is all about the special characters &amp; and &lt;, and for attributes
29   * &quot; and &apos;. These must be encoded/decoded from/to XML.
30   *
31   */
32  public final class XMLEncode {
33  
34      private final static int CDATA_BLOCK_THRESHOLD_LENGTH = 12;
35      private final static char DEFAULT_QUOTE_CHAR = '"';
36  
37      /** Checks if this text purely consists of the white space characters
38       * ' ',  TAB, NEWLINE.
39       */
40      public final static boolean isWhiteSpace(String text) {
41          for (int i = 0; i < text.length(); i++) {
42              char c = text.charAt(i);
43              if (Character.isWhitespace(c)) {
44                  continue;
45              } else {
46                  return false;
47              }
48          }
49          return true;
50      }
51  
52      /** Makes any text fit into XML attributes. */
53      public final static String xmlEncodeTextForAttribute(String text, char quoteChar) {
54          if (text == null)
55              return null;
56          return xmlEncodeTextAsPCDATA(text, true, quoteChar);
57      }
58  
59      /** Encodes text as XML in the most suitable way, either CDATA block or PCDATA. */
60      public final static String xmlEncodeText(String text) {
61          if (text == null)
62              return null;
63          if (!needsEncoding(text)) {
64              return text;
65          } else {
66              // only encode as cdata if is is longer than CDATA block overhead:
67              if (text.length() > CDATA_BLOCK_THRESHOLD_LENGTH) {
68                  String cdata = xmlEncodeTextAsCDATABlock(text);
69                  if (cdata != null) {
70                      return cdata;
71                  }
72              }
73          }
74          // if every thing else fails, do it the save way...
75          return xmlEncodeTextAsPCDATA(text);
76      }
77  
78      /** Encodes any text as PCDATA. */
79      public final static String xmlEncodeTextAsPCDATA(String text) {
80          if (text == null)
81              return null;
82          return xmlEncodeTextAsPCDATA(text, false);
83      }
84  
85      /** Encodes any text as PCDATA. 
86       * @param forAttribute if you want
87       * quotes and apostrophes specially treated for attributes
88       */
89      public final static String xmlEncodeTextAsPCDATA(String text, boolean forAttribute) {
90          return xmlEncodeTextAsPCDATA(text, forAttribute, DEFAULT_QUOTE_CHAR);
91      }
92  
93      /** Encodes any text as PCDATA. 
94       * @param forAttribute if you want
95       * quotes and apostrophes specially treated for attributes
96       * @param quoteChar if this is for attributes this <code>char</code> is used to quote the attribute value
97       */
98      public final static String xmlEncodeTextAsPCDATA(String text, boolean forAttribute, char quoteChar) {
99          if (text == null)
100             return null;
101         char c;
102         StringBuffer n = new StringBuffer(text.length() * 2);
103         for (int i = 0; i < text.length(); i++) {
104             c = text.charAt(i);
105             switch (c) {
106                 case '&' :
107                     n.append("&amp;");
108                     break;
109                 case '<' :
110                     n.append("&lt;");
111                     break;
112                 case '>' : // FIX for sourceforge bug #802520 ("]]>" needs encoding)
113                     n.append("&gt;");
114                     break;
115                 case '"' :
116                     if (forAttribute)
117                         n.append("&quot;");
118                     else
119                         n.append(c);
120                     break;
121                 case '\'' :
122                     if (forAttribute)
123                         n.append("&apos;");
124                     else
125                         n.append(c);
126                     break;
127                 default :
128                     {
129                         n.append(c);
130                         break;
131                     }
132             }
133         }
134 
135         if (forAttribute) {
136             n.append(quoteChar);
137             n.insert(0, quoteChar);
138         }
139 
140         return n.toString();
141     }
142 
143     /** Returns string as CDATA block if possible, otherwise null. */
144     public final static String xmlEncodeTextAsCDATABlock(String text) {
145         if (text == null)
146             return null;
147         if (isCompatibleWithCDATABlock(text)) {
148             return "<![CDATA[" + text + "]]>";
149         } else {
150             return null;
151         }
152     }
153 
154     /** Checks if this text needs encoding in order to be represented in XML. */
155     public final static boolean needsEncoding(String text) {
156         return needsEncoding(text, false);
157     }
158 
159     /** Checks if this text needs encoding in order to be represented in XML.
160      * 
161      * Set <code>checkForAttr</code> if you want to check for storability in 
162      * an attribute. 
163      */
164     public final static boolean needsEncoding(String data, boolean checkForAttr) {
165         if (data == null)
166             return false;
167         char c;
168         for (int i = 0; i < data.length(); i++) {
169             c = data.charAt(i);
170             if (c == '&' || c == '<' || (checkForAttr && (c == '"' || c == '\'')))
171                 return true;
172         }
173         return false;
174     }
175 
176     /** Can this text be stored into a CDATA block? */
177     public final static boolean isCompatibleWithCDATABlock(String text) {
178         if (text == null)
179             return false;
180         return (text.indexOf("]]>") == -1);
181     }
182 
183     /** Make CDATA out of possibly encoded PCDATA. <br>
184      * E.g. make '&amp;' out of '&amp;amp;'
185      */
186     public final static String xmlDecodeTextToCDATA(String pcdata) {
187         if (pcdata == null)
188             return null;
189         char c, c1, c2, c3, c4, c5;
190         StringBuffer n = new StringBuffer(pcdata.length());
191         for (int i = 0; i < pcdata.length(); i++) {
192             c = pcdata.charAt(i);
193             if (c == '&') {
194                 c1 = lookAhead(1, i, pcdata);
195                 c2 = lookAhead(2, i, pcdata);
196                 c3 = lookAhead(3, i, pcdata);
197                 c4 = lookAhead(4, i, pcdata);
198                 c5 = lookAhead(5, i, pcdata);
199 
200                 if (c1 == 'a' && c2 == 'm' && c3 == 'p' && c4 == ';') {
201                     n.append("&");
202                     i += 4;
203                 } else if (c1 == 'l' && c2 == 't' && c3 == ';') {
204                     n.append("<");
205                     i += 3;
206                 } else if (c1 == 'g' && c2 == 't' && c3 == ';') {
207                     n.append(">");
208                     i += 3;
209                 } else if (c1 == 'q' && c2 == 'u' && c3 == 'o' && c4 == 't' && c5 == ';') {
210                     n.append("\"");
211                     i += 5;
212                 } else if (c1 == 'a' && c2 == 'p' && c3 == 'o' && c4 == 's' && c5 == ';') {
213                     n.append("'");
214                     i += 5;
215                 } else
216                     n.append("&");
217             } else
218                 n.append(c);
219         }
220         return n.toString();
221     }
222 
223     private final static char lookAhead(int la, int offset, String data) {
224         try {
225             return data.charAt(offset + la);
226         } catch (StringIndexOutOfBoundsException e) {
227             return 0x0;
228         }
229     }
230 
231     // combine multiple checks in one methods for speed
232     private final static boolean contains(String text, char[] chars) {
233         if (text == null || chars == null || chars.length == 0) {
234             return false;
235         }
236         for (int i = 0; i < text.length(); i++) {
237             char c = text.charAt(i);
238             for (int j = 0; j < chars.length; j++) {
239                 if (chars[j] == c) {
240                     return true;
241                 }
242             }
243         }
244         return false;
245     }
246 
247 }