001    /*
002     * Copyright 1999,2004 The Apache Software Foundation.
003     * 
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     * 
008     *      http://www.apache.org/licenses/LICENSE-2.0
009     * 
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    package org.apache.commons.feedparser;
018    
019    import java.util.Iterator;
020    import java.util.List;
021    
022    import org.jaxen.jdom.JDOMXPath;
023    import org.jdom.Attribute;
024    import org.jdom.Element;
025    
026    /**
027     * Handles parsing RSS .
028     *
029     * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
030     * @version $Id: RSSFeedParser.java 373614 2006-01-30 22:31:21Z mvdb $
031     */
032    public class RSSFeedParser extends BaseParser {
033    
034        /**
035         * Parse the given document as an OPML document.
036         *
037         * 
038         */
039        public static void parse( FeedParserListener listener,
040                                  org.jdom.Document doc ) throws Exception {
041    
042            FeedParserState state = new FeedParserState( listener );
043    
044            FeedVersion v = new FeedVersion();
045    
046            v.isRSS = true;
047            v.version = doc.getRootElement().getAttributeValue( "version" );
048            
049            listener.onFeedVersion( v );
050    
051            listener.init();
052    
053            //*** now process the channel. ***
054            JDOMXPath xpath = new JDOMXPath( "/descendant::*[local-name() = 'channel']" );
055            Element channel = (Element)xpath.selectSingleNode( doc );
056            state.current = channel;
057    
058            doLocale( state, listener, channel );
059            doChannel( listener, state );
060            doLocaleEnd( state, listener, channel );
061    
062            //*** now process the image. ***
063            xpath = new JDOMXPath( "/descendant::*[local-name() = 'image']" );
064            List images = xpath.selectNodes( doc );
065            Iterator i = images.iterator();
066            //update items.
067            while ( i.hasNext() ) {
068    
069                Element child = (Element)i.next();
070                state.current = child;
071                doParseImage( listener, state );
072    
073            }
074    
075            //*** now process all items. ***
076            xpath = new JDOMXPath( "/descendant::*[local-name() = 'item']" );
077    
078            List items = xpath.selectNodes( doc );
079    
080            i = items.iterator();
081                
082            //update items.
083            while ( i.hasNext() ) {
084    
085                Element item = (Element)i.next();
086    
087                state.current = item;
088                    
089                doLocale( state, listener, item );
090                doItem( listener, state );
091                doLocaleEnd( state, listener, item );
092    
093            }
094    
095            listener.finished();
096    
097        }
098        
099        /**
100         * Parse the given channel 
101         *
102         * 
103         */
104        private static void doChannel( FeedParserListener listener,
105                                       FeedParserState state ) throws Exception {
106    
107            String link = getChildElementTextByName( state, "link" );
108    
109            if ( link != null )
110                link = link.trim();
111            
112            listener.onChannel( state, 
113                                getChildElementTextByName( state, "title" ),
114                                link,
115                                getChildElementTextByName( state, "description" ) );
116    
117            listener.onChannelEnd();
118    
119        }
120    
121        /**
122         * Parse the given channel 
123         *
124         * 
125         */
126        private static void doParseImage( FeedParserListener listener,
127                                          FeedParserState state ) throws Exception {
128    
129            String title = getChildElementTextByName( state, "title" );
130            String link = getChildElementTextByName( state, "link" );
131            String url = getChildElementTextByName( state, "url" );
132    
133            if ( url != null ) {
134                listener.onImage( state, title, link, url );
135                listener.onImageEnd();
136    
137            } 
138    
139        }
140    
141        /**
142         * 
143         *
144         * 
145         */
146        private static void doItem( FeedParserListener listener,
147                                    FeedParserState state ) throws Exception {
148    
149            String resource = null;
150    
151            //FIXME: migrate this to XPath
152    
153            JDOMXPath xpath = new JDOMXPath( "@rdf:resource|guid|descendant::*[local-name() = 'link']" );
154            xpath.addNamespace( NS.RDF.getPrefix(), NS.RDF.getURI() );
155            Object node = xpath.selectSingleNode( state.current );
156    
157            //FIXME: if this is a GUID and isPermalink=false don't use it as the
158            //permalink.
159    
160            if ( node instanceof Element ) {
161    
162                Element element = (Element)node;
163                resource = element.getText();
164    
165                if ( "guid".equals( element.getName() ) ) {
166    
167                    boolean isPermaLink =
168                        "true".equals( element.getAttributeValue( "isPermaLink" ) );
169    
170                    if ( isPermaLink == false ) {
171                        //resort to the 'link'
172    
173                        Element link = state.current.getChild( "link" );
174    
175                        if ( link != null ) {
176                            resource = link.getText();
177                        }
178                        
179                    }
180                    
181                }
182    
183            } else if ( node instanceof Attribute ) {
184                resource = ((Attribute)node).getValue();
185            }
186    
187            if ( resource == null )
188                return;
189    
190            //title, link, description
191    
192            listener.onItem( state,
193                             getChildElementTextByName( state, "title" ),
194                             getChildElementTextByName( state, "link" ),
195                             getChildElementTextByName( state, "description" ),
196                             resource );
197    
198            //see if we have content encoded and if we need to report these events.
199    
200            if ( listener instanceof ModContentFeedParserListener ) {
201    
202                ModContentFeedParserListener mcpl = (ModContentFeedParserListener)listener;
203    
204                Element encoded = state.current.getChild( "encoded", NS.CONTENT );
205    
206                if ( encoded != null ) {
207    
208                    //FIXME: move to the onContent API defined within the
209                    //AtomFeedParser and deprecated this body handling.
210    
211                    mcpl.onContentEncoded( new FeedParserState( encoded ),
212                                           encoded.getText() );
213    
214                    mcpl.onContentEncodedEnd();
215    
216                } else {
217    
218                    Element items = state.current.getChild( "items", NS.CONTENT );
219    
220                    if ( items != null ) {
221    
222                        //FIXME: with malformed XML this could throw an NPE. Luckly
223                        //this format is rare now.
224                        Element value =
225                            items.getChild( "Bag", NS.RDF )
226                                .getChild( "li", NS.RDF )
227                                    .getChild( "item", NS.CONTENT )
228                                        .getChild( "value", NS.RDF );
229    
230                        //FIXME: move to the onContent API defined within the
231                        //AtomFeedParser and deprecated this body handling.
232    
233                        mcpl.onContentItem( new FeedParserState( value ),
234                                            null,
235                                            null,
236                                            value );
237    
238                        mcpl.onContentItemEnd();
239    
240                    }
241                        
242                }
243    
244            } 
245    
246            //process xhtml:body
247    
248            if ( listener instanceof XHTMLFeedParserListener ) {
249    
250                XHTMLFeedParserListener xfp = (XHTMLFeedParserListener)listener;
251    
252                Element body = state.current.getChild( "body", NS.XHTML );
253    
254                //FIXME: move to the onContent API defined within the AtomFeedParser
255                //and deprecated this body handling.
256                
257                if ( body != null ) {
258                    xfp.onXHTMLBody( new FeedParserState( body ),
259                                     body );
260                    xfp.onXHTMLBodyEnd();
261                } 
262    
263            }
264    
265            MetaFeedParser.parse( listener, state );
266            TagFeedParser.parse( listener, state );
267    
268            doEnclosures( listener, state );
269            
270            listener.onItemEnd();
271            
272        }
273    
274        private static void doEnclosures( FeedParserListener listener,
275                                          FeedParserState state ) throws Exception {
276    
277            if ( listener instanceof LinkFeedParserListener == false )
278                return;
279    
280            Element element = state.current.getChild( "enclosure" );
281    
282            if ( element == null )
283                return;
284    
285            LinkFeedParserListener linkFeedParserListener = (LinkFeedParserListener)listener;
286    
287            String rel = null;
288            String type = element.getAttributeValue( "type" );
289            String href = element.getAttributeValue( "url" );
290            String title = null;
291            long length = 0;
292            if (element.getAttributeValue("length") != null)
293                    length = Integer.parseInt( element.getAttributeValue( "length" ) );
294    
295            linkFeedParserListener.onLink( state,
296                                           rel,
297                                           type,
298                                           href,
299                                           title,
300                                           length );
301            
302        }
303        
304    }