1 package org.apache.commons.digester3.examples.api.documentmarkup; 2 3 /* 4 * Licensed to the Apache Software Foundation (ASF) under one or more 5 * contributor license agreements. See the NOTICE file distributed with 6 * this work for additional information regarding copyright ownership. 7 * The ASF licenses this file to You under the Apache License, Version 2.0 8 * (the "License"); you may not use this file except in compliance with 9 * the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20 import org.apache.commons.digester3.Digester; 21 import org.apache.commons.digester3.Rule; 22 23 import java.util.List; 24 import javax.xml.parsers.SAXParser; 25 import org.xml.sax.XMLReader; 26 import org.xml.sax.SAXException; 27 import org.xml.sax.Attributes; 28 29 /** 30 * This is a subclass of digester which supports rules which implement 31 * the TextSegmentHandler interface, causing the "textSegment" method 32 * on each matching rule (of the appropriate type) to be invoked when 33 * an element contains a segment of text followed by a child element. 34 * <p> 35 * See the readme file included with this example for more information. 36 */ 37 public class MarkupDigester 38 extends Digester 39 { 40 41 /** See equivalent constructor in Digester class. */ 42 public MarkupDigester() 43 { 44 } 45 46 /** See equivalent constructor in Digester class. */ 47 public MarkupDigester( SAXParser parser ) 48 { 49 super( parser ); 50 } 51 52 /** See equivalent constructor in Digester class. */ 53 public MarkupDigester( XMLReader reader ) 54 { 55 super( reader ); 56 } 57 58 //=================================================================== 59 60 /** 61 * The text found in the current element since the last child element. 62 */ 63 protected StringBuilder currTextSegment = new StringBuilder(); 64 65 /** 66 * Process notification of character data received from the body of 67 * an XML element. 68 * 69 * @param buffer The characters from the XML document 70 * @param start Starting offset into the buffer 71 * @param length Number of characters from the buffer 72 * 73 * @exception SAXException if a parsing error is to be reported 74 */ 75 @Override 76 public void characters( char buffer[], int start, int length ) 77 throws SAXException 78 { 79 super.characters( buffer, start, length ); 80 currTextSegment.append( buffer, start, length ); 81 } 82 83 /** 84 * Process notification of the start of an XML element being reached. 85 * 86 * @param namespaceURI The Namespace URI, or the empty string if the element 87 * has no Namespace URI or if Namespace processing is not being performed. 88 * @param localName The local name (without prefix), or the empty 89 * string if Namespace processing is not being performed. 90 * @param qName The qualified name (with prefix), or the empty 91 * string if qualified names are not available. 92 * @param list The attributes attached to the element. If there are 93 * no attributes, it shall be an empty Attributes object. 94 * @exception SAXException if a parsing error is to be reported 95 */ 96 @Override 97 public void startElement( String namespaceURI, String localName, String qName, Attributes list ) 98 throws SAXException 99 { 100 handleTextSegments(); 101 102 // Unlike bodyText, which accumulates despite intervening child 103 // elements, currTextSegment gets cleared here. This means that 104 // we don't need to save it on a stack either. 105 currTextSegment.setLength( 0 ); 106 107 super.startElement( namespaceURI, localName, qName, list ); 108 } 109 110 /** 111 * Process notification of the end of an XML element being reached. 112 * 113 * @param namespaceURI - The Namespace URI, or the empty string if the 114 * element has no Namespace URI or if Namespace processing is not 115 * being performed. 116 * @param localName - The local name (without prefix), or the empty 117 * string if Namespace processing is not being performed. 118 * @param qName - The qualified XML 1.0 name (with prefix), or the 119 * empty string if qualified names are not available. 120 * @exception SAXException if a parsing error is to be reported 121 */ 122 @Override 123 public void endElement( String namespaceURI, String localName, String qName ) 124 throws SAXException 125 { 126 handleTextSegments(); 127 currTextSegment.setLength( 0 ); 128 super.endElement( namespaceURI, localName, qName ); 129 } 130 131 /** 132 * Iterate over the list of rules most recently matched, and 133 * if any of them implement the TextSegmentHandler interface then 134 * invoke that rule's textSegment method passing the current 135 * segment of text from the xml element body. 136 */ 137 private void handleTextSegments() 138 throws SAXException 139 { 140 if ( currTextSegment.length() > 0 ) 141 { 142 String segment = currTextSegment.toString(); 143 List<Rule> parentMatches = getMatches().peek(); 144 int len = parentMatches.size(); 145 for ( int i = 0; i < len; ++i ) 146 { 147 Rule r = parentMatches.get( i ); 148 if ( r instanceof TextSegmentHandler ) 149 { 150 TextSegmentHandler h = (TextSegmentHandler) r; 151 try 152 { 153 h.textSegment( segment ); 154 } 155 catch ( Exception e ) 156 { 157 throw createSAXException( e ); 158 } 159 } 160 } 161 } 162 } 163 164 }