View Javadoc

1   /*
2    * Copyright 1999,2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.commons.feedparser.locate.blogservice;
18  
19  import java.net.MalformedURLException;
20  import java.util.regex.*;
21  
22  import org.apache.commons.feedparser.FeedParserException;
23  import org.apache.commons.feedparser.locate.*;
24  
25  /**
26   * Models the Blosxom blog service, encapsulating whether a given weblog
27   * is this type of service and where it usually keeps its feeds.
28   * 
29   * @author Brad Neuberg, bkn3@columbia.edu
30   */
31  public class Blosxom extends BlogService {
32      
33      /** A pattern used to discover Blosxom blogs. */
34      private static Pattern blosxomPattern =
35                  Pattern.compile("alt=[\"' ]powered by blosxom[\"' ]",
36                                  Pattern.CASE_INSENSITIVE);
37          
38      /** Returns whether we can trust the results of this blog service's 
39       *  autodiscovery links.  For example, TextAmerica returns invalid 
40       *  autodiscovery results.
41       */
42      public boolean hasValidAutoDiscovery() {
43          return true;
44      }
45      
46      /** Returns whether we should follow HTTP redirects for this blog service.
47       *  Some services don't implement HTTP redirects correctly, while others,
48       *  like Xanga, require it.
49       */
50      public boolean followRedirects() {
51          return false;
52      }
53      
54      /** Determines if the weblog at the given resource and with the given
55       *  content is this blog service.
56       * @param resource A full URI to this resource, such as 
57       * "http://www.codinginparadise.org".
58       * @param content The full HTML content at the resource's URL.
59       * @throws FeedParserException Thrown if an error occurs while 
60       * determining the type of this weblog.
61       */
62      public boolean isThisService(String resource, String content)
63                                                  throws FeedParserException {
64          boolean results = false;
65          
66          // This is the only kind of blog that we need to check for a 
67          // 'Powered by Blosxom'.  We do this with the alt= value on the
68          // Powered By image.
69          // FIXME: This might be fragile, but it is used across all of the
70          // Blosxom blogs I have looked at so far. Brad Neuberg, bkn3@columbia.edu
71          
72          Matcher blosxomMatcher = blosxomPattern.matcher(content);
73          results = blosxomMatcher.find();
74          
75          return results;
76      }
77  
78      /**
79       * Returns an array of FeedReferences that contains information on the
80       * usual locations this blog service contains its feed.  The feeds should
81       * be ordered by quality, so that higher quality feeds come before lower
82       * quality ones (i.e. you would want to have an Atom FeedReference
83       * object come before an RSS 0.91 FeedReference object in this list).
84       * @param resource A URL to the given weblog that might be used to build
85       * up where feeds are usually located.
86       * @param content The full content of the resource URL, which might
87       * be useful to determine where feeds are usually located.  This can be
88       * null.
89       * @throws FeedParserException Thrown if an error occurs while trying
90       * to determine the usual locations of feeds for this service.
91       */
92      public FeedReference[] getFeedLocations(String resource,
93                                              String content)
94                                                  throws FeedParserException {
95          // there is sometimes an index.rss20 file, but Blosxom has a bug where
96          // it incorrectly responds to HTTP HEAD requests for that file,
97          // saying that it exists when it doesn't.  Most sites don't seem
98          // to have this file so we don't include it here. 
99          // Brad Neuberg, bkn3@columbia.edu
100         FeedReference[] blosxomLocations = 
101             { new FeedReference("index.rss", FeedReference.RSS_MEDIA_TYPE) };
102         
103         return blosxomLocations;
104     }
105     
106     /** This method takes a resource, such as "http://www.codinginparadise.org/myweblog.php" target="alexandria_uri">http://www.codinginparadise.org/myweblog.php",
107      *  and gets the path necessary to build up a feed, such as 
108      *  "http://www.codinginparadise.org/".  Basicly it appends a slash 
109      *  to the end if there is not one, and removes any file names that 
110      *  might be at the end, such as "myweblog.php".
111      *
112      *  There is a special exception for some Blosxom blogs,
113      *  which have things inside of a cgi-script and 'hang' their RSS files
114      *  off of this cgi-bin.  For example, 
115      *  http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi has its RSS file
116      *  at http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi/index.rss, so
117      *  we must return the blosxom.cgi at the end as well for this method.
118      * 
119      *  @throws MalformedURLException Thrown if the given resource's URL is 
120      *  incorrectly formatted.
121      */
122     public String getBaseFeedPath( String resource ) {
123         
124         // strip off any query string or anchors
125         int end = resource.lastIndexOf( "#" );
126         
127         if ( end != -1 )
128             resource = resource.substring( 0, end );
129 
130         end = resource.lastIndexOf( "?" );
131 
132         if ( end != -1 )
133             resource = resource.substring( 0, end );
134         
135         if ( ! resource.endsWith( "/" ) ) {
136             resource = resource + "/";
137         }
138         
139         return resource;
140     }
141 }