001    /*
002     * Copyright 1999,2004 The Apache Software Foundation.
003     * 
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     * 
008     *      http://www.apache.org/licenses/LICENSE-2.0
009     * 
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    package org.apache.commons.feedparser.sax;
018    
019    import java.util.HashMap;
020    import java.util.HashSet;
021    
022    import org.apache.commons.feedparser.FeedParserException;
023    import org.apache.commons.feedparser.FeedParserListener;
024    import org.apache.commons.feedparser.FeedParserState;
025    import org.apache.commons.feedparser.FeedVersion;
026    import org.xml.sax.Attributes;
027    import org.xml.sax.SAXException;
028    import org.xml.sax.helpers.DefaultHandler;
029    
030    /** *
031     * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
032     * @version $Id: RSSFeedParser.java 373622 2006-01-30 22:53:00Z mvdb $
033     */
034    public class RSSFeedParser extends BaseDefaultHandler {
035    
036        public FeedParserListener listener = null;
037    
038        boolean onItem = false;
039    
040        HashMap properties = new HashMap();
041    
042        FeedParserState state = new FeedParserState();
043    
044        static HashSet RSS_NAMESPACES = new HashSet();
045    
046        static HashSet RDF_NAMESPACES = new HashSet();
047    
048        static HashSet MOD_CONTENT_NAMESPACES = new HashSet();
049    
050        static {
051    
052            RSS_NAMESPACES.add( "http://purl.org/rss/1.0/" );
053    
054            RDF_NAMESPACES.add( "http://www.w3.org/1999/02/22-rdf-syntax-ns#" );
055    
056            MOD_CONTENT_NAMESPACES.add( "http://purl.org/rss/1.0/modules/content/" );
057            
058        }
059        
060        /**
061         * 
062         * Create a new <code>RSSFeedParser</code> instance.
063         *
064         * 
065         */
066        public RSSFeedParser() {
067    
068            super( "FIXME" );
069            
070            this.parser = this;
071    
072            setNext( new ChannelTemplate( this ) );
073    
074        }
075    
076        public void startDocument() throws SAXException {
077    
078            try { 
079                
080                FeedVersion v = new FeedVersion();
081                v.isRSS = true;
082                listener.onFeedVersion( v );
083                
084                listener.init();
085                
086            } catch ( FeedParserException f ) {
087                throw new SAXException( f );
088            }
089    
090        }
091    
092        public void endDocument() throws SAXException {
093    
094            try { 
095                
096                listener.finished();
097                
098            } catch ( FeedParserException f ) {
099                throw new SAXException( f );
100            }
101    
102        }
103    
104        /**
105         * Match rss:channel
106         */
107        class ChannelTemplate extends BaseDefaultHandler {
108    
109            public ChannelTemplate( RSSFeedParser parser ) {
110    
111                super( "channel", parser.RSS_NAMESPACES, parser );
112    
113                setNext( new URLTemplate( parser ) );
114    
115            }
116    
117            public void beginFeedElement() throws FeedParserException {
118    
119                parser.listener.onChannel( parser.state,
120                                           getProperty( "title" ),
121                                           getProperty( "link" ),
122                                           getProperty( "description" ) );
123    
124            }
125        
126            public void endFeedElement() throws FeedParserException {
127                parser.listener.onChannelEnd();
128            }
129    
130        }
131    
132        /**
133         * Match rss:url for images/etc
134         */
135        class URLTemplate extends BaseDefaultHandler {
136    
137            public URLTemplate( RSSFeedParser parser ) {
138    
139                super( "url", parser.RSS_NAMESPACES, parser );
140    
141                setNext( new ModContentTemplate( parser ) );
142                //this.setNext( new RDFValueTemplate( parser ) );
143    
144            }
145    
146        }
147    
148        /**
149         * Match the rdf:value for mod_content
150         *
151         * 
152         */
153        class ModContentTemplate extends BaseDefaultHandler {
154    
155            public ModContentTemplate( RSSFeedParser parser ) {
156    
157                super( "items", parser.MOD_CONTENT_NAMESPACES, parser );
158    
159                this.setNext( new RDFValueTemplate( parser ) );
160    
161            }
162    
163        }
164        
165        /**
166         * Match the rdf:value for mod_content
167         *
168         * 
169         */
170        class RDFValueTemplate extends BaseDefaultHandler {
171    
172            public RDFValueTemplate( RSSFeedParser parser ) {
173    
174                super( "value", parser.RDF_NAMESPACES, parser );
175    
176                this.setIncludeContent( true );
177                this.setNext( new RSSImageFeedParser( parser ) );
178    
179            }
180    
181            public void endFeedElement() throws FeedParserException {
182                //System.out.println( " FIXME: (debug): " + getProperty( "value" ) );
183            }
184    
185        }
186        
187    }
188    
189    class RSSImageFeedParser extends BaseDefaultHandler {
190    
191        public RSSImageFeedParser( RSSFeedParser parser ) {
192    
193            super( "image", parser.RSS_NAMESPACES, parser );
194    
195            setNext( new RSSItemFeedParser( parser ) );
196    
197        }
198    
199        public void beginFeedElement() throws FeedParserException {
200    
201            parser.listener.onImage( parser.state,
202                                     getProperty( "title" ),
203                                     getProperty( "link" ),
204                                     getProperty( "url" ) );
205    
206        }
207        
208        public void endFeedElement() throws FeedParserException {
209            parser.listener.onImageEnd();
210        }
211    
212    }
213    
214    class RSSItemFeedParser extends BaseDefaultHandler {
215    
216        public RSSItemFeedParser( RSSFeedParser parser ) {
217    
218            super( "item", parser );
219            this.namespaces = parser.RSS_NAMESPACES;
220    
221            setNext( new RSSTitleFeedParser( parser ) );
222    
223        }
224    
225        public void beginFeedElement() throws FeedParserException {
226    
227            parser.listener.onItem( parser.state,
228                                    getProperty( "title" ),
229                                    getProperty( "link" ),
230                                    getProperty( "description" ),
231                                    null );
232    
233        }
234    
235        public void endFeedElement() throws FeedParserException {
236            parser.listener.onItemEnd();
237        }
238        
239    }
240    
241    class RSSTitleFeedParser extends BaseDefaultHandler {
242        
243        public RSSTitleFeedParser( RSSFeedParser parser ) {
244    
245            super( "title", parser );
246    
247            setNext( new RSSLinkFeedParser( parser ) );
248    
249        }
250    
251    }
252    
253    class RSSLinkFeedParser extends BaseDefaultHandler {
254    
255        public RSSLinkFeedParser( RSSFeedParser parser ) {
256            super( "link", parser );
257    
258            setNext( new RSSDescriptionFeedParser( parser ) );
259        }
260    
261    }
262    
263    class RSSDescriptionFeedParser extends BaseDefaultHandler {
264    
265        public RSSDescriptionFeedParser( RSSFeedParser parser ) {
266            super( "description", parser );
267        }
268    
269    }
270    
271    /**
272     * dc:subject support
273     */
274    class RSSDcSubjectFeedParser extends BaseDefaultHandler {
275    
276        //MetaFeedParserListener metadataListener= null;
277        
278        public RSSDcSubjectFeedParser( RSSFeedParser parser ) {
279            super( "subject", parser );
280        }
281    
282        public void beginFeedElement() {
283    
284            //only if it's dc:subject
285            //listener.onSubject( parser.state, parser.getProperty( "subject" ) );
286    
287        }
288    
289        public void endFeedElement() {
290    
291        }
292    
293    }
294    
295    class BaseDefaultHandler extends DefaultHandler {
296    
297        public static int STRING_BUFFER_CAPACITY = 100000;
298        
299        //BUG: this will break on nested code:
300    
301        //     <foo>
302        //         <foo>
303        //
304        //         </foo>
305        //
306        //     </foo>
307    
308        // won't be smart enough to realize it's nested
309        
310        /**
311         * The local name of the element
312         */
313        private String local = null;
314    
315        //FIXME: move to a FastStringBuffer that's not synchronized.
316        private StringBuffer buff = null;
317    
318        private boolean onElement = false;
319    
320        private boolean includeContent = false;
321        
322        BaseDefaultHandler next = null;
323    
324        FeedParserListener listener = null;
325    
326        RSSFeedParser parser = null;
327    
328        static HashMap nsPrefixMapping = new HashMap();
329        
330        /**
331         * Store a hashset of namespaces that the given URL supports.
332         *
333         */
334        HashSet namespaces = null;
335    
336        public BaseDefaultHandler( String local ) {
337            this.local = local;
338        }
339    
340        public BaseDefaultHandler( String local, RSSFeedParser parser ) {
341    
342            this.local = local;
343            this.parser = parser;
344            
345        }
346    
347        public BaseDefaultHandler( String local,
348                                   HashSet namespaces,
349                                   RSSFeedParser parser ) {
350    
351            this.local = local;
352            this.namespaces = namespaces;
353            this.parser = parser;
354            
355        }
356    
357        /**
358         * If true we include the RAW XML content from the parser.
359         *
360         * 
361         */
362        public void setIncludeContent( boolean includeContent ) {
363            this.includeContent = includeContent;
364        }
365        
366        /**
367         * Set the next template to process in this chain.
368         *
369         * 
370         */
371        public void setNext( BaseDefaultHandler next ) {
372            this.next = next;
373        }
374        
375        /**
376         * Return the value of character data forfor the element.
377         *
378         * 
379         */
380        public String toString() {
381    
382            if ( buff == null )
383                return null;
384            
385            if ( buff.length() == 0 )
386                return null;
387    
388            return buff.toString();
389        }
390    
391        /**
392         * Return true if the namespace is valid and this class is handling the
393         * given element name
394         *
395         * 
396         */
397        boolean isLocal( String namespace, String local ) {
398    
399            //wee if we need to test forfor namespaces
400            if ( namespace != null && namespaces != null && ! namespaces.contains( namespace ) )
401                return false;
402    
403            return this.local.equals( local );
404        }
405    
406        /**
407         * Get the value of a string property we found whilewhile parsing
408         *
409         * 
410         */
411        public String getProperty( String name ) {
412            return (String)parser.properties.get( name );
413        }
414    
415        public boolean getBoolean( String name ) {
416    
417            return "true".equals( getProperty( name ) );
418            
419        }
420    
421        /**
422         * Method to call when we're finished processing this element but BEFORE
423         * processing of the next element in the chain.
424         *
425         * 
426         */
427        public void beginFeedElement() throws FeedParserException {}
428    
429        /**
430         * Method to call when we're finished processing this element but AFTER
431         * processing of the next element in the chain.
432         *
433         * 
434         */
435        public void endFeedElement() throws FeedParserException {}
436    
437        private boolean includeContentPrefix( String namespace ) {
438    
439            if ( namespace != null ) {
440    
441                String prefix = (String)nsPrefixMapping.get( namespace );
442    
443                if ( prefix != null ) {
444    
445                    buff.append( prefix );
446                    buff.append( ":" );
447                    return true;
448                }
449    
450            }
451    
452            return false;
453            
454        }
455        
456        // **** SAX DefaultHandler **************************************************
457    
458        /**
459         * Keep track of namespaces.
460         *
461         * 
462         */
463        public void startPrefixMapping( String prefix,
464                                        String namespace ) throws SAXException {
465    
466            if ( prefix != null && ! "".equals( prefix ) ) {
467                //System.out.println( namespace + " -> " + prefix );
468            
469                nsPrefixMapping.put( namespace, prefix );
470    
471            } 
472    
473        }
474    
475        //FIXME: it might be possible to call an item again without a member and the
476        //value from the LAST item is used... this needs to be a fatal error and we
477        //need to clear ...
478    
479        public void startElement( String namespace,
480                                  String local,
481                                  String qName,
482                                  Attributes attributes ) throws SAXException {
483    
484            if ( isLocal( namespace, local ) ) {
485    
486                //FIXME: is there a more efficient way to clear a buffer than this?
487    
488                //FIXME: also only do this ifif it's necessary and content has
489                //actually been added.  This will save some performance.
490    
491                //buff = new StringBuffer( STRING_BUFFER_CAPACITY );
492    
493                //buff = new StringBuffer( 1000 );
494    
495                if ( buff == null ) {
496                    buff = new StringBuffer( 1000 );
497                } else {
498                    buff.setLength( 0 );
499                }
500    
501                onElement = true;
502            }
503    
504            if ( next != null )
505                next.startElement( namespace, local, qName, attributes );
506    
507            if ( includeContent && onElement ) {
508                buff.append( "<" );
509    
510                boolean hasPrefix = includeContentPrefix( namespace );
511                
512                buff.append( local );
513    
514                if ( ! hasPrefix && namespace != null ) {
515                    buff.append( " xmlns=\"" );
516                    buff.append( namespace );
517                    buff.append( "\"" );
518                }
519    
520                //now include attributes
521    
522                int length = attributes.getLength();
523    
524                for ( int i = 0; i < length; ++i ) {
525    
526                    buff.append( " " );
527                    buff.append( attributes.getQName( i ) );
528                    buff.append( "=" );
529                    buff.append( "\"" );
530                    buff.append( attributes.getValue( i ) );
531                    buff.append( "\"" );
532    
533                }
534                
535                buff.append( ">" );
536            }
537            
538        }
539    
540        public void characters( char[] ch,
541                                int start,
542                                int length ) throws SAXException {
543     
544            if ( onElement ) {
545                buff.append( ch, start, length );
546            }
547    
548            if ( next != null )
549                next.characters( ch, start, length );
550    
551        }
552        
553        public void endElement( String namespace,
554                                String local,
555                                String qName ) throws SAXException {
556    
557            try { 
558    
559                if ( isLocal( namespace, local ) ) {
560    
561                    onElement = false;
562                    parser.properties.put( local, toString() );
563    
564                    beginFeedElement();
565                
566                }
567    
568                if ( next != null )
569                    next.endElement( namespace, local, qName );
570    
571                if ( isLocal( namespace, local ) )
572                    endFeedElement();
573    
574                if ( includeContent && onElement ) {
575                    buff.append( "</" );
576    
577                    includeContentPrefix( namespace );
578    
579                    buff.append( local );
580    
581                    buff.append( ">" );
582                }
583    
584            } catch ( FeedParserException fpe ) {
585    
586                throw new SAXException( fpe );
587    
588            }
589    
590        }
591    
592    }
593