001    /*
002     * Copyright 1999,2004 The Apache Software Foundation.
003     * 
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     * 
008     *      http://www.apache.org/licenses/LICENSE-2.0
009     * 
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    package org.apache.commons.feedparser;
018    
019    import java.util.Iterator;
020    import java.util.List;
021    
022    import org.apache.commons.feedparser.locate.EntityDecoder;
023    import org.jaxen.jdom.JDOMXPath;
024    import org.jdom.Attribute;
025    import org.jdom.CDATA;
026    import org.jdom.Comment;
027    import org.jdom.Element;
028    import org.jdom.Text;
029    import org.jdom.output.XMLOutputter;
030    
031    /**
032     * http://www.intertwingly.net/wiki/pie/FrontPage
033     *  
034     * http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-05.txt
035     * 
036     * http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-04.txt
037     * 
038     * http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html
039     * 
040     * http://www.ietf.org/html.charters/atompub-charter.html
041     * 
042     * http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-01.txt
043     * 
044     * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
045     * @version $Id: AtomFeedParser.java 373614 2006-01-30 22:31:21Z mvdb $
046     */
047    public class AtomFeedParser extends BaseParser {
048    
049        /**
050         * Parse this feed.
051         *
052         * 
053         */
054        public static void parse( FeedParserListener listener,
055                                  org.jdom.Document doc ) throws Exception {
056    
057            FeedParserState state = new FeedParserState( listener );
058    
059            FeedVersion v = new FeedVersion();
060            v.isAtom = true;
061            listener.onFeedVersion( v );
062    
063            listener.init();
064    
065            Element root = doc.getRootElement();
066    
067            doLocale( state, listener, root );
068            
069            doChannel( state, listener, doc );
070            doEntry( state, listener, doc );
071    
072            doLocaleEnd( state, listener, root );
073    
074            listener.finished();
075    
076        }
077    
078        private static void doChannel( FeedParserState state,
079                                       FeedParserListener listener,
080                                       org.jdom.Document doc ) throws Exception {
081    
082            Element root = doc.getRootElement();
083    
084            //perform onChannel method...  (title, link, description)
085            String title = selectText( "/atom:feed/atom:title", root );
086    
087            //xpath = new XPath( "/atom:feed/atom:link[atom:rel='alternate']" );
088            
089            //perform onChannel method...  (title, link, description)
090            String link = selectSingleAttribute( "/atom:feed/atom:link[@rel='alternate'][@type='text/html']/@href", root );
091    
092            //String description = selectText( "/atom:feed/atom:summary[@rel='text/plain']", doc );
093    
094            String tagline = selectText( "/atom:feed/atom:tagline", root );
095            
096            //state.current = title;
097            listener.onChannel( state, title, link, tagline );
098    
099            listener.onChannelEnd();
100    
101        }
102    
103        private static void doEntry( FeedParserState state,
104                                     FeedParserListener listener,
105                                     org.jdom.Document doc ) throws Exception {
106    
107            JDOMXPath xpath = new JDOMXPath( "/atom:feed/atom:entry" );
108            xpath.setNamespaceContext( NS.context );
109    
110            List items = xpath.selectNodes( doc );
111    
112            Iterator i = items.iterator();
113                
114            //update items.
115            while ( i.hasNext() ) {
116    
117                Element child = (Element)i.next();
118    
119                doLocale( state, listener, child );
120                
121                String title = selectText( "atom:title", child );
122    
123                // The "atom:link" element is a Link construct that conveys a URI
124                // associated with the entry. The nature of the relationship as well
125                // as the link itself is determined by the element's content.
126    
127                // atom:entry elements MUST contain at least one atom:link element
128                // with a rel attribute value of "alternate".
129    
130                // atom:entry elements MUST NOT contain more than one atom:link
131                // element with a rel attribute value of "alternate" that has the
132                // same type attribute value.
133    
134                // atom:entry elements MAY contain additional atom:link elements
135                // beyond those described above.
136                
137                String link = selectSingleAttribute( "atom:link[@rel='alternate'][@type='text/html']/@href",
138                                                     child );
139    
140                // The "atom:summary" element is a Content construct that conveys a
141                // short summary, abstract or excerpt of the entry. atom:entry
142                // elements MAY contain an atom:created element, but MUST NOT
143                // contain more than one.
144    
145                //FIXME: what if there is no type attribute specified?  Whats the default?
146    
147                // Content constructs MAY have a "type" attribute, whose value
148                // indicates the media type of the content.  When present, this
149                // attribute's value MUST be a media type [RFC2045].  If this
150                // attribute is not present, processors MUST behave as if it were
151                // present with a value of "text/ plain".
152    
153                String description = null;
154    
155                Element summary = child.getChild( "summary", NS.ATOM );
156    
157                if ( summary != null ) {
158    
159                    String type = summary.getAttributeValue( "type", NS.ATOM );
160                    
161                    if ( type == null || "text/plain".equals( type ) )
162                        description = summary.getText();
163                    
164                }
165    
166                state.current = child;
167                
168                listener.onItem( state, title, link, description, link );
169                
170                doLink( state, listener, child );
171                
172                doMeta( state, listener, child );
173    
174                doContent( state, listener, child );
175    
176                MetaFeedParser.parse( listener, state );
177                TagFeedParser.parse( listener, state );
178                
179                listener.onItemEnd();
180                doLocale( state, listener, child );
181    
182            }
183    
184        }
185    
186        private static void doLink( FeedParserState state,
187                                    FeedParserListener listener,
188                                    Element current ) throws Exception {
189    
190            if ( listener instanceof LinkFeedParserListener == false )
191                return;
192    
193            LinkFeedParserListener lfpl = (LinkFeedParserListener)listener;
194            
195            JDOMXPath xpath = new JDOMXPath( "atom:link" );
196            xpath.setNamespaceContext( NS.context );
197    
198            List items = xpath.selectNodes( current );
199    
200            Iterator it = items.iterator();
201                
202            //update items.
203            while ( it.hasNext() ) {
204    
205                Element link = (Element)it.next();
206                
207                String href = link.getAttributeValue( "href" );
208                String rel = link.getAttributeValue( "rel" );
209                String type = link.getAttributeValue( "type" );
210    
211                String title = null;
212                long length = -1;
213    
214                lfpl.onLink( state, rel, type, href, title, length );
215                
216            }
217            
218        }
219        
220        private static void doContent( FeedParserState state,
221                                       FeedParserListener listener,
222                                       Element current ) throws Exception {
223    
224            if ( ! (listener instanceof ContentFeedParserListener) )
225                return;
226            
227            ContentFeedParserListener clistener = (ContentFeedParserListener)listener;
228    
229            JDOMXPath xpath = new JDOMXPath( "atom:content" );
230            xpath.setNamespaceContext( NS.context );
231    
232            List items = xpath.selectNodes( current );
233    
234            Iterator i = items.iterator();
235                
236            //update items.
237            while ( i.hasNext() ) {
238    
239                Element content = (Element)i.next();
240    
241                doLocale( state, listener, content );
242    
243                String type = content.getAttributeValue( "type", "text/plain" );
244                String mode = content.getAttributeValue( "mode" );
245    
246                String format = null;
247                String encoding = null;
248    
249                String value = null;
250    
251                //
252                if ( "xml".equals( mode ) ) {
253                    value = content.getText();
254                } else if ( "escaped".equals( mode ) ) {
255    
256                    //need to decode the content here &lt; -> < etc.
257                    value = getXMLOfContent( content.getContent() );
258                    value = EntityDecoder.decode( value );
259                } else {
260                    mode = "xml";
261                    value = getXMLOfContent( content.getContent() );
262                }
263    
264                boolean isSummary = false;
265                
266                clistener.onContent( state, type, format, encoding, mode, value, isSummary );
267    
268                doLocaleEnd( state, listener, content );
269                
270            }
271    
272            xpath = new JDOMXPath( "atom:summary[@type='application/xhtml+xml']" );
273            xpath.setNamespaceContext( NS.context );
274            Element e = (Element)xpath.selectSingleNode( current );
275    
276            if ( e != null ) {
277    
278                String type = "text/html";
279                String format = "application/xhtml+xml";
280                String encoding = null;
281                String mode = "xml";
282    
283                //FIXME: get xml:base to expand the URIs.
284                
285                String value = getXMLOfContent( e );
286                boolean isSummary = true;
287                
288                clistener.onContent( state, type, format, encoding, mode, value, isSummary );
289    
290            }
291            
292        }
293    
294        private static String getXMLOfContent( Element element ) {
295            return getXMLOfContent( element.getContent() );
296        }
297        
298        /**
299         * Get the content of the given element.
300         *
301         * 
302         */
303        private static String getXMLOfContent( List content ) {
304    
305            //NOTE: Fri Mar 04 2005 03:59 PM (burton1@rojo.com): in my profiling I
306            //found that this is a BIG memory allocater.  FIXME: We SHOULD be able
307            //to do the same thing we do for xhtml:body RIGHT?
308            
309            StringBuffer buff = new StringBuffer( 10000 ); 
310    
311            // NOTE: Changed this constructor to use the default Format. Since the
312            // constructor used no longer exists in jdom 1.0.
313            XMLOutputter outputter = new XMLOutputter();
314    
315            Iterator it = content.iterator();
316            
317            while ( it.hasNext() ) {
318    
319                Object next = it.next();
320                
321                if ( next instanceof String ) {
322                    buff.append( (String)next );
323                } else if ( next instanceof Element ) {
324                    buff.append( outputter.outputString( (Element)next ) );
325                } else if ( next instanceof CDATA ) {
326                    buff.append( ((CDATA)next).getText() );
327                } else if ( next instanceof Comment ) {
328                    buff.append( outputter.outputString( (Comment)next ) );
329                } else if ( next instanceof Text ) {
330                    buff.append( outputter.outputString( (Text)next ) );
331                } 
332    
333            } 
334    
335            return buff.toString();
336            
337        }
338    
339        private static void doMeta( FeedParserState state,
340                                    FeedParserListener listener,
341                                    Element element ) throws Exception {
342    
343            //FIXME: move this code to MetaFeedParser...
344            
345            if ( ! (listener instanceof MetaFeedParserListener) ) 
346                return;
347    
348            MetaFeedParserListener mlistener = (MetaFeedParserListener)listener;
349    
350            //handle issued, created, and then dublin core..
351            String subject = selectText( "dc:subject", element);
352    
353            if ( subject != null ) {
354                mlistener.onSubject( state, subject );
355                mlistener.onSubjectEnd();
356            } 
357    
358        }
359    
360        private static Element selectSingleElement( String query, org.jdom.Document doc ) throws Exception {
361    
362            JDOMXPath xpath = new JDOMXPath( query );
363            xpath.setNamespaceContext( NS.context );
364            
365            //perform onChannel method...  (title, link, description)
366            return (Element)xpath.selectSingleNode( doc );
367    
368        }
369    
370        private static String selectSingleAttribute( String query, Element element ) throws Exception {
371    
372            JDOMXPath xpath = new JDOMXPath( query );
373            xpath.setNamespaceContext( NS.context );
374            
375            //perform onChannel method...  (title, link, description)
376            Attribute a = (Attribute)xpath.selectSingleNode( element );
377            if ( a == null )
378                return null;
379            
380            return a.getValue();
381    
382        }
383    
384    }
385