001    /*
002     * Copyright 1999,2004 The Apache Software Foundation.
003     * 
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     * 
008     *      http://www.apache.org/licenses/LICENSE-2.0
009     * 
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    package org.apache.commons.feedparser.locate;
018    
019    import java.util.Iterator;
020    
021    import org.apache.commons.feedparser.FeedList;
022    import org.apache.commons.feedparser.network.ResourceRequest;
023    import org.apache.commons.feedparser.network.ResourceRequestFactory;
024    import org.apache.log4j.Logger;
025    
026    /**
027     * Method to determine feed URLs from a given resource URI.  For example,
028     * you would pass in the URI:
029     * 
030     * http://www.codinginparadise.org
031     * 
032     * and this class would pass back a List with one address of the feed URL,
033     * which is
034     * 
035     * http://www.codinginparadise.org/weblog/atom.xml"
036     *
037     * <code>
038     * String resource = "http://www.codinginparadise.org";
039     * FeedList l = FeedLocator.locate( resource );
040     * </code>
041     * 
042     * @author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
043     */
044    public class FeedLocator {
045    
046        private static Logger log = Logger.getLogger( FeedLocator.class );
047        
048        /**
049         * Locate all feeds within the given resource.  The resource should be a link
050         * to an (X)HTML document, usually a weblog or a website.
051         * 
052         * Example: http://peerfear.org
053         *
054         * @param resource The weblog we need to discover
055         * 
056         */
057        public static final FeedList locate( String resource ) throws Exception {
058            // \: Use my network library when it's migrated into Apache.
059            
060            //fetch content
061            ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource );
062    
063            String content = request.getInputStreamAsString();
064    
065            //return resources
066            return locate( resource, content );
067            
068        }
069    
070        /**
071         * Locate the feed with the given content.
072         *
073         * 
074         */
075        public static final FeedList locate( String resource, String content ) throws Exception {
076    
077            log.info( "Locating " + resource + "..." );
078            
079            FeedList list = new FeedList();
080    
081            //FIXME: if we were GIVEN an RSS/Atom/OPML/etc file then we should just
082            //attempt to use this and return a FeedList with just one entry.  Parse
083            //it first I think to make sure its valid XML and then move forward.
084            //The downside here is that it would be wasted CPU if its HTML content.
085            
086            log.debug( "Using DiscoveryLocator..." );
087            DiscoveryLocator.locate( resource, content, list );
088            log.debug("after discoverylocator, list="+list);
089    
090            log.debug( "Using LinkLocator..." );
091            //this failed... try looking for links
092            LinkLocator.locate( resource, content, list );
093            log.debug("after linklocator, list="+list);
094    
095            //this failed... try probe location.  This is more reliable than
096            //LinkLocation but requires a few more HTTP gets.
097            log.debug( "Using ProbeLocator..." );
098            ProbeLocator.locate( resource, content, list );
099            log.debug("after probelocator, list="+list);
100            
101            log.info( "After locating, list="+list );
102                
103            return list;
104            
105        }
106    
107        public static void main( String[] args ) throws Exception {
108    
109            //This should find http://www.electoral-vote.com/index.rss
110            //String resource = "http://brendonwilson.com/";
111    
112            //String resource = "file:///projects/feedparser/tests/locate4.html";
113            //String resource = "file:///projects/feedparser/tests/locate5.html";
114            //String resource = "file:///projects/feedparser/tests/locate6.html";
115    
116            //FIXME: add UNIT TESTS for Yahoo Groups and Flickr
117    
118            String resource = "http://craigslist.org/w4m/";
119            
120            //String resource = "http://groups.yahoo.com/group/aggregators/";
121    
122            //String resource = "http://flickr.com/photos/tags/cats";
123    
124            //String resource = "file:///projects/feedparser/tests/locate8.html";
125    
126            //String resource = "http://blogs.sun.com/roller/page/gonzo";
127    
128            //String resource = "http://gonze.com/weblog/";
129    
130            //String resource = "http://codinginparadise.org/";
131    
132            //        String resource = "http://bucsfishingreport.com/pMachine/weblog.php";
133            
134            //String resource = "http://www.livejournal.com/community/indiexiankids/";
135    //String resource= "http://www.thealarmclock.com/mt/";
136            
137            //String resource = "http://guinness.joeuser.com";
138            
139            //String resource = "http://georgewbush.com/blog";
140    
141            //String resource = "http://carolinascl.blogspot.com/";
142            
143            //String resource = "http://www.corante.com/strange/";
144            //String resource = "http://peerfear.org";
145    
146            ProbeLocator.BLOG_SERVICE_PROBING_ENABLED = true;
147            ProbeLocator.AGGRESIVE_PROBING_ENABLED = true;
148    
149            FeedList l = locate( resource );
150    
151            Iterator it = l.iterator();
152    
153            if ( it.hasNext() == false ) {
154                System.out.println( "NO LINKS FOUND" );
155            } 
156    
157            System.out.println( "AD RSS: " + l.getAdRSSFeed() );
158            System.out.println( "AD Atom: " + l.getAdAtomFeed() );
159            
160            while ( it.hasNext() ) {
161    
162                FeedReference ref = (FeedReference)it.next();
163    
164                System.out.println( ref.resource );
165                
166            }
167    
168        }
169    
170    }