001 /*
002 * Copyright 1999,2004 The Apache Software Foundation.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017 package org.apache.commons.feedparser;
018
019 import java.util.Iterator;
020 import java.util.List;
021
022 import org.jaxen.jdom.JDOMXPath;
023 import org.jdom.Attribute;
024 import org.jdom.Element;
025
026 /**
027 * Handles parsing RSS .
028 *
029 * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
030 * @version $Id: RSSFeedParser.java 373614 2006-01-30 22:31:21Z mvdb $
031 */
032 public class RSSFeedParser extends BaseParser {
033
034 /**
035 * Parse the given document as an OPML document.
036 *
037 *
038 */
039 public static void parse( FeedParserListener listener,
040 org.jdom.Document doc ) throws Exception {
041
042 FeedParserState state = new FeedParserState( listener );
043
044 FeedVersion v = new FeedVersion();
045
046 v.isRSS = true;
047 v.version = doc.getRootElement().getAttributeValue( "version" );
048
049 listener.onFeedVersion( v );
050
051 listener.init();
052
053 //*** now process the channel. ***
054 JDOMXPath xpath = new JDOMXPath( "/descendant::*[local-name() = 'channel']" );
055 Element channel = (Element)xpath.selectSingleNode( doc );
056 state.current = channel;
057
058 doLocale( state, listener, channel );
059 doChannel( listener, state );
060 doLocaleEnd( state, listener, channel );
061
062 //*** now process the image. ***
063 xpath = new JDOMXPath( "/descendant::*[local-name() = 'image']" );
064 List images = xpath.selectNodes( doc );
065 Iterator i = images.iterator();
066 //update items.
067 while ( i.hasNext() ) {
068
069 Element child = (Element)i.next();
070 state.current = child;
071 doParseImage( listener, state );
072
073 }
074
075 //*** now process all items. ***
076 xpath = new JDOMXPath( "/descendant::*[local-name() = 'item']" );
077
078 List items = xpath.selectNodes( doc );
079
080 i = items.iterator();
081
082 //update items.
083 while ( i.hasNext() ) {
084
085 Element item = (Element)i.next();
086
087 state.current = item;
088
089 doLocale( state, listener, item );
090 doItem( listener, state );
091 doLocaleEnd( state, listener, item );
092
093 }
094
095 listener.finished();
096
097 }
098
099 /**
100 * Parse the given channel
101 *
102 *
103 */
104 private static void doChannel( FeedParserListener listener,
105 FeedParserState state ) throws Exception {
106
107 String link = getChildElementTextByName( state, "link" );
108
109 if ( link != null )
110 link = link.trim();
111
112 listener.onChannel( state,
113 getChildElementTextByName( state, "title" ),
114 link,
115 getChildElementTextByName( state, "description" ) );
116
117 listener.onChannelEnd();
118
119 }
120
121 /**
122 * Parse the given channel
123 *
124 *
125 */
126 private static void doParseImage( FeedParserListener listener,
127 FeedParserState state ) throws Exception {
128
129 String title = getChildElementTextByName( state, "title" );
130 String link = getChildElementTextByName( state, "link" );
131 String url = getChildElementTextByName( state, "url" );
132
133 if ( url != null ) {
134 listener.onImage( state, title, link, url );
135 listener.onImageEnd();
136
137 }
138
139 }
140
141 /**
142 *
143 *
144 *
145 */
146 private static void doItem( FeedParserListener listener,
147 FeedParserState state ) throws Exception {
148
149 String resource = null;
150
151 //FIXME: migrate this to XPath
152
153 JDOMXPath xpath = new JDOMXPath( "@rdf:resource|guid|descendant::*[local-name() = 'link']" );
154 xpath.addNamespace( NS.RDF.getPrefix(), NS.RDF.getURI() );
155 Object node = xpath.selectSingleNode( state.current );
156
157 //FIXME: if this is a GUID and isPermalink=false don't use it as the
158 //permalink.
159
160 if ( node instanceof Element ) {
161
162 Element element = (Element)node;
163 resource = element.getText();
164
165 if ( "guid".equals( element.getName() ) ) {
166
167 boolean isPermaLink =
168 "true".equals( element.getAttributeValue( "isPermaLink" ) );
169
170 if ( isPermaLink == false ) {
171 //resort to the 'link'
172
173 Element link = state.current.getChild( "link" );
174
175 if ( link != null ) {
176 resource = link.getText();
177 }
178
179 }
180
181 }
182
183 } else if ( node instanceof Attribute ) {
184 resource = ((Attribute)node).getValue();
185 }
186
187 if ( resource == null )
188 return;
189
190 //title, link, description
191
192 listener.onItem( state,
193 getChildElementTextByName( state, "title" ),
194 getChildElementTextByName( state, "link" ),
195 getChildElementTextByName( state, "description" ),
196 resource );
197
198 //see if we have content encoded and if we need to report these events.
199
200 if ( listener instanceof ModContentFeedParserListener ) {
201
202 ModContentFeedParserListener mcpl = (ModContentFeedParserListener)listener;
203
204 Element encoded = state.current.getChild( "encoded", NS.CONTENT );
205
206 if ( encoded != null ) {
207
208 //FIXME: move to the onContent API defined within the
209 //AtomFeedParser and deprecated this body handling.
210
211 mcpl.onContentEncoded( new FeedParserState( encoded ),
212 encoded.getText() );
213
214 mcpl.onContentEncodedEnd();
215
216 } else {
217
218 Element items = state.current.getChild( "items", NS.CONTENT );
219
220 if ( items != null ) {
221
222 //FIXME: with malformed XML this could throw an NPE. Luckly
223 //this format is rare now.
224 Element value =
225 items.getChild( "Bag", NS.RDF )
226 .getChild( "li", NS.RDF )
227 .getChild( "item", NS.CONTENT )
228 .getChild( "value", NS.RDF );
229
230 //FIXME: move to the onContent API defined within the
231 //AtomFeedParser and deprecated this body handling.
232
233 mcpl.onContentItem( new FeedParserState( value ),
234 null,
235 null,
236 value );
237
238 mcpl.onContentItemEnd();
239
240 }
241
242 }
243
244 }
245
246 //process xhtml:body
247
248 if ( listener instanceof XHTMLFeedParserListener ) {
249
250 XHTMLFeedParserListener xfp = (XHTMLFeedParserListener)listener;
251
252 Element body = state.current.getChild( "body", NS.XHTML );
253
254 //FIXME: move to the onContent API defined within the AtomFeedParser
255 //and deprecated this body handling.
256
257 if ( body != null ) {
258 xfp.onXHTMLBody( new FeedParserState( body ),
259 body );
260 xfp.onXHTMLBodyEnd();
261 }
262
263 }
264
265 MetaFeedParser.parse( listener, state );
266 TagFeedParser.parse( listener, state );
267
268 doEnclosures( listener, state );
269
270 listener.onItemEnd();
271
272 }
273
274 private static void doEnclosures( FeedParserListener listener,
275 FeedParserState state ) throws Exception {
276
277 if ( listener instanceof LinkFeedParserListener == false )
278 return;
279
280 Element element = state.current.getChild( "enclosure" );
281
282 if ( element == null )
283 return;
284
285 LinkFeedParserListener linkFeedParserListener = (LinkFeedParserListener)listener;
286
287 String rel = null;
288 String type = element.getAttributeValue( "type" );
289 String href = element.getAttributeValue( "url" );
290 String title = null;
291 long length = 0;
292 if (element.getAttributeValue("length") != null)
293 length = Integer.parseInt( element.getAttributeValue( "length" ) );
294
295 linkFeedParserListener.onLink( state,
296 rel,
297 type,
298 href,
299 title,
300 length );
301
302 }
303
304 }