AtomFeedParser xref

View Javadoc

1   /*
2    * Copyright 1999,2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.commons.feedparser;
18  
19  import java.util.Iterator;
20  import java.util.List;
21  
22  import org.apache.commons.feedparser.locate.EntityDecoder;
23  import org.jaxen.jdom.JDOMXPath;
24  import org.jdom.Attribute;
25  import org.jdom.CDATA;
26  import org.jdom.Comment;
27  import org.jdom.Element;
28  import org.jdom.Text;
29  import org.jdom.output.XMLOutputter;
30  
31  /**
32   * http://www.intertwingly.net/wiki/pie/FrontPage
33   *  
34   * http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-05.txt
35   * 
36   * http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-04.txt
37   * 
38   * http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html
39   * 
40   * http://www.ietf.org/html.charters/atompub-charter.html
41   * 
42   * http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-01.txt
43   * 
44   * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
45   * @version $Id: AtomFeedParser.java 373614 2006-01-30 22:31:21Z mvdb $
46   */
47  public class AtomFeedParser extends BaseParser {
48  
49      /**
50       * Parse this feed.
51       *
52       * 
53       */
54      public static void parse( FeedParserListener listener,
55                                org.jdom.Document doc ) throws Exception {
56  
57          FeedParserState state = new FeedParserState( listener );
58  
59          FeedVersion v = new FeedVersion();
60          v.isAtom = true;
61          listener.onFeedVersion( v );
62  
63          listener.init();
64  
65          Element root = doc.getRootElement();
66  
67          doLocale( state, listener, root );
68          
69          doChannel( state, listener, doc );
70          doEntry( state, listener, doc );
71  
72          doLocaleEnd( state, listener, root );
73  
74          listener.finished();
75  
76      }
77  
78      private static void doChannel( FeedParserState state,
79                                     FeedParserListener listener,
80                                     org.jdom.Document doc ) throws Exception {
81  
82          Element root = doc.getRootElement();
83  
84          //perform onChannel method...  (title, link, description)
85          String title = selectText( "/atom:feed/atom:title", root );
86  
87          //xpath = new XPath( "/atom:feed/atom:link[atom:rel='alternate']" );
88          
89          //perform onChannel method...  (title, link, description)
90          String link = selectSingleAttribute( "/atom:feed/atom:link[@rel='alternate'][@type='text/html']/@href", root );
91  
92          //String description = selectText( "/atom:feed/atom:summary[@rel='text/plain']", doc );
93  
94          String tagline = selectText( "/atom:feed/atom:tagline", root );
95          
96          //state.current = title;
97          listener.onChannel( state, title, link, tagline );
98  
99          listener.onChannelEnd();
100 
101     }
102 
103     private static void doEntry( FeedParserState state,
104                                  FeedParserListener listener,
105                                  org.jdom.Document doc ) throws Exception {
106 
107         JDOMXPath xpath = new JDOMXPath( "/atom:feed/atom:entry" );
108         xpath.setNamespaceContext( NS.context );
109 
110         List items = xpath.selectNodes( doc );
111 
112         Iterator i = items.iterator();
113             
114         //update items.
115         while ( i.hasNext() ) {
116 
117             Element child = (Element)i.next();
118 
119             doLocale( state, listener, child );
120             
121             String title = selectText( "atom:title", child );
122 
123             // The "atom:link" element is a Link construct that conveys a URI
124             // associated with the entry. The nature of the relationship as well
125             // as the link itself is determined by the element's content.
126 
127             // atom:entry elements MUST contain at least one atom:link element
128             // with a rel attribute value of "alternate".
129 
130             // atom:entry elements MUST NOT contain more than one atom:link
131             // element with a rel attribute value of "alternate" that has the
132             // same type attribute value.
133 
134             // atom:entry elements MAY contain additional atom:link elements
135             // beyond those described above.
136             
137             String link = selectSingleAttribute( "atom:link[@rel='alternate'][@type='text/html']/@href",
138                                                  child );
139 
140             // The "atom:summary" element is a Content construct that conveys a
141             // short summary, abstract or excerpt of the entry. atom:entry
142             // elements MAY contain an atom:created element, but MUST NOT
143             // contain more than one.
144 
145             //FIXME: what if there is no type attribute specified?  Whats the default?
146 
147             // Content constructs MAY have a "type" attribute, whose value
148             // indicates the media type of the content.  When present, this
149             // attribute's value MUST be a media type [RFC2045].  If this
150             // attribute is not present, processors MUST behave as if it were
151             // present with a value of "text/ plain".
152 
153             String description = null;
154 
155             Element summary = child.getChild( "summary", NS.ATOM );
156 
157             if ( summary != null ) {
158 
159                 String type = summary.getAttributeValue( "type", NS.ATOM );
160                 
161                 if ( type == null || "text/plain".equals( type ) )
162                     description = summary.getText();
163                 
164             }
165 
166             state.current = child;
167             
168             listener.onItem( state, title, link, description, link );
169             
170             doLink( state, listener, child );
171             
172             doMeta( state, listener, child );
173 
174             doContent( state, listener, child );
175 
176             MetaFeedParser.parse( listener, state );
177             TagFeedParser.parse( listener, state );
178             
179             listener.onItemEnd();
180             doLocale( state, listener, child );
181 
182         }
183 
184     }
185 
186     private static void doLink( FeedParserState state,
187                                 FeedParserListener listener,
188                                 Element current ) throws Exception {
189 
190         if ( listener instanceof LinkFeedParserListener == false )
191             return;
192 
193         LinkFeedParserListener lfpl = (LinkFeedParserListener)listener;
194         
195         JDOMXPath xpath = new JDOMXPath( "atom:link" );
196         xpath.setNamespaceContext( NS.context );
197 
198         List items = xpath.selectNodes( current );
199 
200         Iterator it = items.iterator();
201             
202         //update items.
203         while ( it.hasNext() ) {
204 
205             Element link = (Element)it.next();
206             
207             String href = link.getAttributeValue( "href" );
208             String rel = link.getAttributeValue( "rel" );
209             String type = link.getAttributeValue( "type" );
210 
211             String title = null;
212             long length = -1;
213 
214             lfpl.onLink( state, rel, type, href, title, length );
215             
216         }
217         
218     }
219     
220     private static void doContent( FeedParserState state,
221                                    FeedParserListener listener,
222                                    Element current ) throws Exception {
223 
224         if ( ! (listener instanceof ContentFeedParserListener) )
225             return;
226         
227         ContentFeedParserListener clistener = (ContentFeedParserListener)listener;
228 
229         JDOMXPath xpath = new JDOMXPath( "atom:content" );
230         xpath.setNamespaceContext( NS.context );
231 
232         List items = xpath.selectNodes( current );
233 
234         Iterator i = items.iterator();
235             
236         //update items.
237         while ( i.hasNext() ) {
238 
239             Element content = (Element)i.next();
240 
241             doLocale( state, listener, content );
242 
243             String type = content.getAttributeValue( "type", "text/plain" );
244             String mode = content.getAttributeValue( "mode" );
245 
246             String format = null;
247             String encoding = null;
248 
249             String value = null;
250 
251             //
252             if ( "xml".equals( mode ) ) {
253                 value = content.getText();
254             } else if ( "escaped".equals( mode ) ) {
255 
256                 //need to decode the content here &lt; -> < etc.
257                 value = getXMLOfContent( content.getContent() );
258                 value = EntityDecoder.decode( value );
259             } else {
260                 mode = "xml";
261                 value = getXMLOfContent( content.getContent() );
262             }
263 
264             boolean isSummary = false;
265             
266             clistener.onContent( state, type, format, encoding, mode, value, isSummary );
267 
268             doLocaleEnd( state, listener, content );
269             
270         }
271 
272         xpath = new JDOMXPath( "atom:summary[@type='application/xhtml+xml']" );
273         xpath.setNamespaceContext( NS.context );
274         Element e = (Element)xpath.selectSingleNode( current );
275 
276         if ( e != null ) {
277 
278             String type = "text/html";
279             String format = "application/xhtml+xml";
280             String encoding = null;
281             String mode = "xml";
282 
283             //FIXME: get xml:base to expand the URIs.
284             
285             String value = getXMLOfContent( e );
286             boolean isSummary = true;
287             
288             clistener.onContent( state, type, format, encoding, mode, value, isSummary );
289 
290         }
291         
292     }
293 
294     private static String getXMLOfContent( Element element ) {
295         return getXMLOfContent( element.getContent() );
296     }
297     
298     /**
299      * Get the content of the given element.
300      *
301      * 
302      */
303     private static String getXMLOfContent( List content ) {
304 
305         //NOTE: Fri Mar 04 2005 03:59 PM (burton1@rojo.com): in my profiling I
306         //found that this is a BIG memory allocater.  FIXME: We SHOULD be able
307         //to do the same thing we do for xhtml:body RIGHT?
308         
309         StringBuffer buff = new StringBuffer( 10000 ); 
310 
311         // NOTE: Changed this constructor to use the default Format. Since the
312         // constructor used no longer exists in jdom 1.0.
313         XMLOutputter outputter = new XMLOutputter();
314 
315         Iterator it = content.iterator();
316         
317         while ( it.hasNext() ) {
318 
319             Object next = it.next();
320             
321             if ( next instanceof String ) {
322                 buff.append( (String)next );
323             } else if ( next instanceof Element ) {
324                 buff.append( outputter.outputString( (Element)next ) );
325             } else if ( next instanceof CDATA ) {
326                 buff.append( ((CDATA)next).getText() );
327             } else if ( next instanceof Comment ) {
328                 buff.append( outputter.outputString( (Comment)next ) );
329             } else if ( next instanceof Text ) {
330                 buff.append( outputter.outputString( (Text)next ) );
331             } 
332 
333         } 
334 
335         return buff.toString();
336         
337     }
338 
339     private static void doMeta( FeedParserState state,
340                                 FeedParserListener listener,
341                                 Element element ) throws Exception {
342 
343         //FIXME: move this code to MetaFeedParser...
344         
345         if ( ! (listener instanceof MetaFeedParserListener) ) 
346             return;
347 
348         MetaFeedParserListener mlistener = (MetaFeedParserListener)listener;
349 
350         //handle issued, created, and then dublin core..
351         String subject = selectText( "dc:subject", element);
352 
353         if ( subject != null ) {
354             mlistener.onSubject( state, subject );
355             mlistener.onSubjectEnd();
356         } 
357 
358     }
359 
360     private static Element selectSingleElement( String query, org.jdom.Document doc ) throws Exception {
361 
362         JDOMXPath xpath = new JDOMXPath( query );
363         xpath.setNamespaceContext( NS.context );
364         
365         //perform onChannel method...  (title, link, description)
366         return (Element)xpath.selectSingleNode( doc );
367 
368     }
369 
370     private static String selectSingleAttribute( String query, Element element ) throws Exception {
371 
372         JDOMXPath xpath = new JDOMXPath( query );
373         xpath.setNamespaceContext( NS.context );
374         
375         //perform onChannel method...  (title, link, description)
376         Attribute a = (Attribute)xpath.selectSingleNode( element );
377         if ( a == null )
378             return null;
379         
380         return a.getValue();
381 
382     }
383 
384 }
385