View Javadoc

1   /*
2    * Copyright 2002,2004 The Apache Software Foundation.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.apache.commons.jelly.tags.html;
17  
18  import org.apache.commons.jelly.JellyTagException;
19  import org.apache.commons.jelly.XMLOutput;
20  import org.apache.commons.jelly.tags.xml.ParseTagSupport;
21  
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  
25  import org.cyberneko.html.parsers.SAXParser;
26  
27  import org.dom4j.Document;
28  import org.dom4j.io.SAXReader;
29  
30  import org.xml.sax.SAXException;
31  
32  
33  /*** A tag which parses some HTML and defines a variable with the parsed Document.
34    * The HTML can either be specified as its body or can be passed in via the
35    * html property which can be a Reader, InputStream, URL or String URI.
36    *
37    * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
38    * @version $Revision: 155420 $
39    */
40  public class ParseTag extends ParseTagSupport {
41  
42      /*** The Log to which logging calls will be made. */
43      private static final Log log = LogFactory.getLog(ParseTag.class);
44  
45      /*** The HTML to parse, either a String URI, a Reader or InputStream */
46      private Object html;
47      private String element = "match";
48      private String attribute = "no-change";
49  
50      public ParseTag() {
51      }
52  
53      // Tag interface
54      //-------------------------------------------------------------------------
55      public void doTag(XMLOutput output) throws JellyTagException {
56          if (getVar() == null) {
57              throw new IllegalArgumentException("The var attribute cannot be null");
58          }
59          Document document = null;
60          if (html == null) {
61              String text = getText();
62              if (text != null) {
63                  document = parseText(text);
64              }
65              else {
66                  document = parseBody(output);
67              }
68          }
69          else {
70              document = parse(html);
71          }
72          context.setVariable(getVar(), document);
73      }
74  
75      // Properties
76      //-------------------------------------------------------------------------
77      /*** Sets the source of the HTML which is either a String URI, Reader or InputStream */
78      public void setHtml(Object html) {
79          this.html = html;
80      }
81  
82      /***
83       * Sets whether attributes should be converted to a different case.
84       * Possible values are "upper", "lower" or "no-change"
85       *
86       * @param attribute The processing mode of attributes
87       */
88      public void setAttribute(String attribute) {
89          this.attribute = attribute;
90      }
91  
92      /***
93       * Sets whether elements should be converted to a different case
94       * Possible values are "upper", "lower" or "match"
95       *
96       * @param element The processing mode of elements
97       */
98      public void setElement(String element) {
99          this.element = element;
100     }
101 
102 
103     // Implementation methods
104     //-------------------------------------------------------------------------
105 
106     /***
107      * Factory method to create a new SAXReader
108      */
109     protected SAXReader createSAXReader() throws SAXException {
110         // installs the NeckHTML parser
111         SAXParser parser = new SAXParser();
112         parser.setProperty(
113             "http://cyberneko.org/html/properties/names/elems",
114             element
115         );
116         parser.setProperty(
117             "http://cyberneko.org/html/properties/names/attrs",
118             attribute
119         );
120         return new SAXReader( parser );
121     }
122 }