FeedLocator xref

View Javadoc

1   /*
2    * Copyright 1999,2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.commons.feedparser.locate;
18  
19  import java.util.Iterator;
20  
21  import org.apache.commons.feedparser.FeedList;
22  import org.apache.commons.feedparser.network.ResourceRequest;
23  import org.apache.commons.feedparser.network.ResourceRequestFactory;
24  import org.apache.log4j.Logger;
25  
26  /**
27   * Method to determine feed URLs from a given resource URI.  For example,
28   * you would pass in the URI:
29   * 
30   * http://www.codinginparadise.org
31   * 
32   * and this class would pass back a List with one address of the feed URL,
33   * which is
34   * 
35   * http://www.codinginparadise.org/weblog/atom.xml"
36   *
37   * <code>
38   * String resource = "http://www.codinginparadise.org";
39   * FeedList l = FeedLocator.locate( resource );
40   * </code>
41   * 
42   * @author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
43   */
44  public class FeedLocator {
45  
46      private static Logger log = Logger.getLogger( FeedLocator.class );
47      
48      /**
49       * Locate all feeds within the given resource.  The resource should be a link
50       * to an (X)HTML document, usually a weblog or a website.
51       * 
52       * Example: http://peerfear.org
53       *
54       * @param resource The weblog we need to discover
55       * 
56       */
57      public static final FeedList locate( String resource ) throws Exception {
58          // \: Use my network library when it's migrated into Apache.
59          
60          //fetch content
61          ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource );
62  
63          String content = request.getInputStreamAsString();
64  
65          //return resources
66          return locate( resource, content );
67          
68      }
69  
70      /**
71       * Locate the feed with the given content.
72       *
73       * 
74       */
75      public static final FeedList locate( String resource, String content ) throws Exception {
76  
77          log.info( "Locating " + resource + "..." );
78          
79          FeedList list = new FeedList();
80  
81          //FIXME: if we were GIVEN an RSS/Atom/OPML/etc file then we should just
82          //attempt to use this and return a FeedList with just one entry.  Parse
83          //it first I think to make sure its valid XML and then move forward.
84          //The downside here is that it would be wasted CPU if its HTML content.
85          
86          log.debug( "Using DiscoveryLocator..." );
87          DiscoveryLocator.locate( resource, content, list );
88          log.debug("after discoverylocator, list="+list);
89  
90          log.debug( "Using LinkLocator..." );
91          //this failed... try looking for links
92          LinkLocator.locate( resource, content, list );
93          log.debug("after linklocator, list="+list);
94  
95          //this failed... try probe location.  This is more reliable than
96          //LinkLocation but requires a few more HTTP gets.
97          log.debug( "Using ProbeLocator..." );
98          ProbeLocator.locate( resource, content, list );
99          log.debug("after probelocator, list="+list);
100         
101         log.info( "After locating, list="+list );
102             
103         return list;
104         
105     }
106 
107     public static void main( String[] args ) throws Exception {
108 
109         //This should find http://www.electoral-vote.com/index.rss
110         //String resource = "http://brendonwilson.com/";
111 
112         //String resource = "file:///projects/feedparser/tests/locate4.html";
113         //String resource = "file:///projects/feedparser/tests/locate5.html";
114         //String resource = "file:///projects/feedparser/tests/locate6.html";
115 
116         //FIXME: add UNIT TESTS for Yahoo Groups and Flickr
117 
118         String resource = "http://craigslist.org/w4m/";
119         
120         //String resource = "http://groups.yahoo.com/group/aggregators/";
121 
122         //String resource = "http://flickr.com/photos/tags/cats";
123 
124         //String resource = "file:///projects/feedparser/tests/locate8.html";
125 
126         //String resource = "http://blogs.sun.com/roller/page/gonzo";
127 
128         //String resource = "http://gonze.com/weblog/";
129 
130         //String resource = "http://codinginparadise.org/";
131 
132         //        String resource = "http://bucsfishingreport.com/pMachine/weblog.php";
133         
134         //String resource = "http://www.livejournal.com/community/indiexiankids/";
135 //String resource= "http://www.thealarmclock.com/mt/";
136         
137         //String resource = "http://guinness.joeuser.com";
138         
139         //String resource = "http://georgewbush.com/blog";
140 
141         //String resource = "http://carolinascl.blogspot.com/";
142         
143         //String resource = "http://www.corante.com/strange/";
144         //String resource = "http://peerfear.org";
145 
146         ProbeLocator.BLOG_SERVICE_PROBING_ENABLED = true;
147         ProbeLocator.AGGRESIVE_PROBING_ENABLED = true;
148 
149         FeedList l = locate( resource );
150 
151         Iterator it = l.iterator();
152 
153         if ( it.hasNext() == false ) {
154             System.out.println( "NO LINKS FOUND" );
155         } 
156 
157         System.out.println( "AD RSS: " + l.getAdRSSFeed() );
158         System.out.println( "AD Atom: " + l.getAdAtomFeed() );
159         
160         while ( it.hasNext() ) {
161 
162             FeedReference ref = (FeedReference)it.next();
163 
164             System.out.println( ref.resource );
165             
166         }
167 
168     }
169 
170 }