001 /*
002 * Copyright 1999,2004 The Apache Software Foundation.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017 package org.apache.commons.feedparser;
018
019 import java.util.Iterator;
020 import java.util.List;
021
022 import org.apache.commons.feedparser.locate.EntityDecoder;
023 import org.jaxen.jdom.JDOMXPath;
024 import org.jdom.Attribute;
025 import org.jdom.CDATA;
026 import org.jdom.Comment;
027 import org.jdom.Element;
028 import org.jdom.Text;
029 import org.jdom.output.XMLOutputter;
030
031 /**
032 * http://www.intertwingly.net/wiki/pie/FrontPage
033 *
034 * http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-05.txt
035 *
036 * http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-04.txt
037 *
038 * http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html
039 *
040 * http://www.ietf.org/html.charters/atompub-charter.html
041 *
042 * http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-01.txt
043 *
044 * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
045 * @version $Id: AtomFeedParser.java 373614 2006-01-30 22:31:21Z mvdb $
046 */
047 public class AtomFeedParser extends BaseParser {
048
049 /**
050 * Parse this feed.
051 *
052 *
053 */
054 public static void parse( FeedParserListener listener,
055 org.jdom.Document doc ) throws Exception {
056
057 FeedParserState state = new FeedParserState( listener );
058
059 FeedVersion v = new FeedVersion();
060 v.isAtom = true;
061 listener.onFeedVersion( v );
062
063 listener.init();
064
065 Element root = doc.getRootElement();
066
067 doLocale( state, listener, root );
068
069 doChannel( state, listener, doc );
070 doEntry( state, listener, doc );
071
072 doLocaleEnd( state, listener, root );
073
074 listener.finished();
075
076 }
077
078 private static void doChannel( FeedParserState state,
079 FeedParserListener listener,
080 org.jdom.Document doc ) throws Exception {
081
082 Element root = doc.getRootElement();
083
084 //perform onChannel method... (title, link, description)
085 String title = selectText( "/atom:feed/atom:title", root );
086
087 //xpath = new XPath( "/atom:feed/atom:link[atom:rel='alternate']" );
088
089 //perform onChannel method... (title, link, description)
090 String link = selectSingleAttribute( "/atom:feed/atom:link[@rel='alternate'][@type='text/html']/@href", root );
091
092 //String description = selectText( "/atom:feed/atom:summary[@rel='text/plain']", doc );
093
094 String tagline = selectText( "/atom:feed/atom:tagline", root );
095
096 //state.current = title;
097 listener.onChannel( state, title, link, tagline );
098
099 listener.onChannelEnd();
100
101 }
102
103 private static void doEntry( FeedParserState state,
104 FeedParserListener listener,
105 org.jdom.Document doc ) throws Exception {
106
107 JDOMXPath xpath = new JDOMXPath( "/atom:feed/atom:entry" );
108 xpath.setNamespaceContext( NS.context );
109
110 List items = xpath.selectNodes( doc );
111
112 Iterator i = items.iterator();
113
114 //update items.
115 while ( i.hasNext() ) {
116
117 Element child = (Element)i.next();
118
119 doLocale( state, listener, child );
120
121 String title = selectText( "atom:title", child );
122
123 // The "atom:link" element is a Link construct that conveys a URI
124 // associated with the entry. The nature of the relationship as well
125 // as the link itself is determined by the element's content.
126
127 // atom:entry elements MUST contain at least one atom:link element
128 // with a rel attribute value of "alternate".
129
130 // atom:entry elements MUST NOT contain more than one atom:link
131 // element with a rel attribute value of "alternate" that has the
132 // same type attribute value.
133
134 // atom:entry elements MAY contain additional atom:link elements
135 // beyond those described above.
136
137 String link = selectSingleAttribute( "atom:link[@rel='alternate'][@type='text/html']/@href",
138 child );
139
140 // The "atom:summary" element is a Content construct that conveys a
141 // short summary, abstract or excerpt of the entry. atom:entry
142 // elements MAY contain an atom:created element, but MUST NOT
143 // contain more than one.
144
145 //FIXME: what if there is no type attribute specified? Whats the default?
146
147 // Content constructs MAY have a "type" attribute, whose value
148 // indicates the media type of the content. When present, this
149 // attribute's value MUST be a media type [RFC2045]. If this
150 // attribute is not present, processors MUST behave as if it were
151 // present with a value of "text/ plain".
152
153 String description = null;
154
155 Element summary = child.getChild( "summary", NS.ATOM );
156
157 if ( summary != null ) {
158
159 String type = summary.getAttributeValue( "type", NS.ATOM );
160
161 if ( type == null || "text/plain".equals( type ) )
162 description = summary.getText();
163
164 }
165
166 state.current = child;
167
168 listener.onItem( state, title, link, description, link );
169
170 doLink( state, listener, child );
171
172 doMeta( state, listener, child );
173
174 doContent( state, listener, child );
175
176 MetaFeedParser.parse( listener, state );
177 TagFeedParser.parse( listener, state );
178
179 listener.onItemEnd();
180 doLocale( state, listener, child );
181
182 }
183
184 }
185
186 private static void doLink( FeedParserState state,
187 FeedParserListener listener,
188 Element current ) throws Exception {
189
190 if ( listener instanceof LinkFeedParserListener == false )
191 return;
192
193 LinkFeedParserListener lfpl = (LinkFeedParserListener)listener;
194
195 JDOMXPath xpath = new JDOMXPath( "atom:link" );
196 xpath.setNamespaceContext( NS.context );
197
198 List items = xpath.selectNodes( current );
199
200 Iterator it = items.iterator();
201
202 //update items.
203 while ( it.hasNext() ) {
204
205 Element link = (Element)it.next();
206
207 String href = link.getAttributeValue( "href" );
208 String rel = link.getAttributeValue( "rel" );
209 String type = link.getAttributeValue( "type" );
210
211 String title = null;
212 long length = -1;
213
214 lfpl.onLink( state, rel, type, href, title, length );
215
216 }
217
218 }
219
220 private static void doContent( FeedParserState state,
221 FeedParserListener listener,
222 Element current ) throws Exception {
223
224 if ( ! (listener instanceof ContentFeedParserListener) )
225 return;
226
227 ContentFeedParserListener clistener = (ContentFeedParserListener)listener;
228
229 JDOMXPath xpath = new JDOMXPath( "atom:content" );
230 xpath.setNamespaceContext( NS.context );
231
232 List items = xpath.selectNodes( current );
233
234 Iterator i = items.iterator();
235
236 //update items.
237 while ( i.hasNext() ) {
238
239 Element content = (Element)i.next();
240
241 doLocale( state, listener, content );
242
243 String type = content.getAttributeValue( "type", "text/plain" );
244 String mode = content.getAttributeValue( "mode" );
245
246 String format = null;
247 String encoding = null;
248
249 String value = null;
250
251 //
252 if ( "xml".equals( mode ) ) {
253 value = content.getText();
254 } else if ( "escaped".equals( mode ) ) {
255
256 //need to decode the content here < -> < etc.
257 value = getXMLOfContent( content.getContent() );
258 value = EntityDecoder.decode( value );
259 } else {
260 mode = "xml";
261 value = getXMLOfContent( content.getContent() );
262 }
263
264 boolean isSummary = false;
265
266 clistener.onContent( state, type, format, encoding, mode, value, isSummary );
267
268 doLocaleEnd( state, listener, content );
269
270 }
271
272 xpath = new JDOMXPath( "atom:summary[@type='application/xhtml+xml']" );
273 xpath.setNamespaceContext( NS.context );
274 Element e = (Element)xpath.selectSingleNode( current );
275
276 if ( e != null ) {
277
278 String type = "text/html";
279 String format = "application/xhtml+xml";
280 String encoding = null;
281 String mode = "xml";
282
283 //FIXME: get xml:base to expand the URIs.
284
285 String value = getXMLOfContent( e );
286 boolean isSummary = true;
287
288 clistener.onContent( state, type, format, encoding, mode, value, isSummary );
289
290 }
291
292 }
293
294 private static String getXMLOfContent( Element element ) {
295 return getXMLOfContent( element.getContent() );
296 }
297
298 /**
299 * Get the content of the given element.
300 *
301 *
302 */
303 private static String getXMLOfContent( List content ) {
304
305 //NOTE: Fri Mar 04 2005 03:59 PM (burton1@rojo.com): in my profiling I
306 //found that this is a BIG memory allocater. FIXME: We SHOULD be able
307 //to do the same thing we do for xhtml:body RIGHT?
308
309 StringBuffer buff = new StringBuffer( 10000 );
310
311 // NOTE: Changed this constructor to use the default Format. Since the
312 // constructor used no longer exists in jdom 1.0.
313 XMLOutputter outputter = new XMLOutputter();
314
315 Iterator it = content.iterator();
316
317 while ( it.hasNext() ) {
318
319 Object next = it.next();
320
321 if ( next instanceof String ) {
322 buff.append( (String)next );
323 } else if ( next instanceof Element ) {
324 buff.append( outputter.outputString( (Element)next ) );
325 } else if ( next instanceof CDATA ) {
326 buff.append( ((CDATA)next).getText() );
327 } else if ( next instanceof Comment ) {
328 buff.append( outputter.outputString( (Comment)next ) );
329 } else if ( next instanceof Text ) {
330 buff.append( outputter.outputString( (Text)next ) );
331 }
332
333 }
334
335 return buff.toString();
336
337 }
338
339 private static void doMeta( FeedParserState state,
340 FeedParserListener listener,
341 Element element ) throws Exception {
342
343 //FIXME: move this code to MetaFeedParser...
344
345 if ( ! (listener instanceof MetaFeedParserListener) )
346 return;
347
348 MetaFeedParserListener mlistener = (MetaFeedParserListener)listener;
349
350 //handle issued, created, and then dublin core..
351 String subject = selectText( "dc:subject", element);
352
353 if ( subject != null ) {
354 mlistener.onSubject( state, subject );
355 mlistener.onSubjectEnd();
356 }
357
358 }
359
360 private static Element selectSingleElement( String query, org.jdom.Document doc ) throws Exception {
361
362 JDOMXPath xpath = new JDOMXPath( query );
363 xpath.setNamespaceContext( NS.context );
364
365 //perform onChannel method... (title, link, description)
366 return (Element)xpath.selectSingleNode( doc );
367
368 }
369
370 private static String selectSingleAttribute( String query, Element element ) throws Exception {
371
372 JDOMXPath xpath = new JDOMXPath( query );
373 xpath.setNamespaceContext( NS.context );
374
375 //perform onChannel method... (title, link, description)
376 Attribute a = (Attribute)xpath.selectSingleNode( element );
377 if ( a == null )
378 return null;
379
380 return a.getValue();
381
382 }
383
384 }
385