001    /*
002     * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons-sandbox//xmlio/src/java/org/apache/commons/xmlio/out/XMLEncode.java,v 1.1 2004/10/08 11:56:20 ozeigermann Exp $
003     * $Revision: 155476 $
004     * $Date: 2005-02-26 13:31:24 +0000 (Sat, 26 Feb 2005) $
005     *
006     * ====================================================================
007     *
008     * Copyright 2004 The Apache Software Foundation 
009     *
010     * Licensed under the Apache License, Version 2.0 (the "License");
011     * you may not use this file except in compliance with the License.
012     * You may obtain a copy of the License at
013     *
014     *     http://www.apache.org/licenses/LICENSE-2.0
015     *
016     * Unless required by applicable law or agreed to in writing, software
017     * distributed under the License is distributed on an "AS IS" BASIS,
018     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
019     * See the License for the specific language governing permissions and
020     * limitations under the License.
021     *
022     */
023    
024    package org.apache.commons.xmlio.out;
025    
026    /**
027     * Collection of XML encoding/decoding helpers. <br>
028     * This is all about the special characters &amp; and &lt;, and for attributes
029     * &quot; and &apos;. These must be encoded/decoded from/to XML.
030     *
031     */
032    public final class XMLEncode {
033    
034        private final static int CDATA_BLOCK_THRESHOLD_LENGTH = 12;
035        private final static char DEFAULT_QUOTE_CHAR = '"';
036    
037        /** Checks if this text purely consists of the white space characters
038         * ' ',  TAB, NEWLINE.
039         */
040        public final static boolean isWhiteSpace(String text) {
041            for (int i = 0; i < text.length(); i++) {
042                char c = text.charAt(i);
043                if (Character.isWhitespace(c)) {
044                    continue;
045                } else {
046                    return false;
047                }
048            }
049            return true;
050        }
051    
052        /** Makes any text fit into XML attributes. */
053        public final static String xmlEncodeTextForAttribute(String text, char quoteChar) {
054            if (text == null)
055                return null;
056            return xmlEncodeTextAsPCDATA(text, true, quoteChar);
057        }
058    
059        /** Encodes text as XML in the most suitable way, either CDATA block or PCDATA. */
060        public final static String xmlEncodeText(String text) {
061            if (text == null)
062                return null;
063            if (!needsEncoding(text)) {
064                return text;
065            } else {
066                // only encode as cdata if is is longer than CDATA block overhead:
067                if (text.length() > CDATA_BLOCK_THRESHOLD_LENGTH) {
068                    String cdata = xmlEncodeTextAsCDATABlock(text);
069                    if (cdata != null) {
070                        return cdata;
071                    }
072                }
073            }
074            // if every thing else fails, do it the save way...
075            return xmlEncodeTextAsPCDATA(text);
076        }
077    
078        /** Encodes any text as PCDATA. */
079        public final static String xmlEncodeTextAsPCDATA(String text) {
080            if (text == null)
081                return null;
082            return xmlEncodeTextAsPCDATA(text, false);
083        }
084    
085        /** Encodes any text as PCDATA. 
086         * @param forAttribute if you want
087         * quotes and apostrophes specially treated for attributes
088         */
089        public final static String xmlEncodeTextAsPCDATA(String text, boolean forAttribute) {
090            return xmlEncodeTextAsPCDATA(text, forAttribute, DEFAULT_QUOTE_CHAR);
091        }
092    
093        /** Encodes any text as PCDATA. 
094         * @param forAttribute if you want
095         * quotes and apostrophes specially treated for attributes
096         * @param quoteChar if this is for attributes this <code>char</code> is used to quote the attribute value
097         */
098        public final static String xmlEncodeTextAsPCDATA(String text, boolean forAttribute, char quoteChar) {
099            if (text == null)
100                return null;
101            char c;
102            StringBuffer n = new StringBuffer(text.length() * 2);
103            for (int i = 0; i < text.length(); i++) {
104                c = text.charAt(i);
105                switch (c) {
106                    case '&' :
107                        n.append("&amp;");
108                        break;
109                    case '<' :
110                        n.append("&lt;");
111                        break;
112                    case '>' : // FIX for sourceforge bug #802520 ("]]>" needs encoding)
113                        n.append("&gt;");
114                        break;
115                    case '"' :
116                        if (forAttribute)
117                            n.append("&quot;");
118                        else
119                            n.append(c);
120                        break;
121                    case '\'' :
122                        if (forAttribute)
123                            n.append("&apos;");
124                        else
125                            n.append(c);
126                        break;
127                    default :
128                        {
129                            n.append(c);
130                            break;
131                        }
132                }
133            }
134    
135            if (forAttribute) {
136                n.append(quoteChar);
137                n.insert(0, quoteChar);
138            }
139    
140            return n.toString();
141        }
142    
143        /** Returns string as CDATA block if possible, otherwise null. */
144        public final static String xmlEncodeTextAsCDATABlock(String text) {
145            if (text == null)
146                return null;
147            if (isCompatibleWithCDATABlock(text)) {
148                return "<![CDATA[" + text + "]]>";
149            } else {
150                return null;
151            }
152        }
153    
154        /** Checks if this text needs encoding in order to be represented in XML. */
155        public final static boolean needsEncoding(String text) {
156            return needsEncoding(text, false);
157        }
158    
159        /** Checks if this text needs encoding in order to be represented in XML.
160         * 
161         * Set <code>checkForAttr</code> if you want to check for storability in 
162         * an attribute. 
163         */
164        public final static boolean needsEncoding(String data, boolean checkForAttr) {
165            if (data == null)
166                return false;
167            char c;
168            for (int i = 0; i < data.length(); i++) {
169                c = data.charAt(i);
170                if (c == '&' || c == '<' || (checkForAttr && (c == '"' || c == '\'')))
171                    return true;
172            }
173            return false;
174        }
175    
176        /** Can this text be stored into a CDATA block? */
177        public final static boolean isCompatibleWithCDATABlock(String text) {
178            if (text == null)
179                return false;
180            return (text.indexOf("]]>") == -1);
181        }
182    
183        /** Make CDATA out of possibly encoded PCDATA. <br>
184         * E.g. make '&amp;' out of '&amp;amp;'
185         */
186        public final static String xmlDecodeTextToCDATA(String pcdata) {
187            if (pcdata == null)
188                return null;
189            char c, c1, c2, c3, c4, c5;
190            StringBuffer n = new StringBuffer(pcdata.length());
191            for (int i = 0; i < pcdata.length(); i++) {
192                c = pcdata.charAt(i);
193                if (c == '&') {
194                    c1 = lookAhead(1, i, pcdata);
195                    c2 = lookAhead(2, i, pcdata);
196                    c3 = lookAhead(3, i, pcdata);
197                    c4 = lookAhead(4, i, pcdata);
198                    c5 = lookAhead(5, i, pcdata);
199    
200                    if (c1 == 'a' && c2 == 'm' && c3 == 'p' && c4 == ';') {
201                        n.append("&");
202                        i += 4;
203                    } else if (c1 == 'l' && c2 == 't' && c3 == ';') {
204                        n.append("<");
205                        i += 3;
206                    } else if (c1 == 'g' && c2 == 't' && c3 == ';') {
207                        n.append(">");
208                        i += 3;
209                    } else if (c1 == 'q' && c2 == 'u' && c3 == 'o' && c4 == 't' && c5 == ';') {
210                        n.append("\"");
211                        i += 5;
212                    } else if (c1 == 'a' && c2 == 'p' && c3 == 'o' && c4 == 's' && c5 == ';') {
213                        n.append("'");
214                        i += 5;
215                    } else
216                        n.append("&");
217                } else
218                    n.append(c);
219            }
220            return n.toString();
221        }
222    
223        private final static char lookAhead(int la, int offset, String data) {
224            try {
225                return data.charAt(offset + la);
226            } catch (StringIndexOutOfBoundsException e) {
227                return 0x0;
228            }
229        }
230    
231        // combine multiple checks in one methods for speed
232        private final static boolean contains(String text, char[] chars) {
233            if (text == null || chars == null || chars.length == 0) {
234                return false;
235            }
236            for (int i = 0; i < text.length(); i++) {
237                char c = text.charAt(i);
238                for (int j = 0; j < chars.length; j++) {
239                    if (chars[j] == c) {
240                        return true;
241                    }
242                }
243            }
244            return false;
245        }
246    
247    }