1 /*
2 * Copyright 1999,2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package org.apache.commons.feedparser.locate.blogservice;
18
19 import java.net.MalformedURLException;
20 import java.util.regex.*;
21
22 import org.apache.commons.feedparser.FeedParserException;
23 import org.apache.commons.feedparser.locate.*;
24
25 /**
26 * Models the Blosxom blog service, encapsulating whether a given weblog
27 * is this type of service and where it usually keeps its feeds.
28 *
29 * @author Brad Neuberg, bkn3@columbia.edu
30 */
31 public class Blosxom extends BlogService {
32
33 /** A pattern used to discover Blosxom blogs. */
34 private static Pattern blosxomPattern =
35 Pattern.compile("alt=[\"' ]powered by blosxom[\"' ]",
36 Pattern.CASE_INSENSITIVE);
37
38 /** Returns whether we can trust the results of this blog service's
39 * autodiscovery links. For example, TextAmerica returns invalid
40 * autodiscovery results.
41 */
42 public boolean hasValidAutoDiscovery() {
43 return true;
44 }
45
46 /** Returns whether we should follow HTTP redirects for this blog service.
47 * Some services don't implement HTTP redirects correctly, while others,
48 * like Xanga, require it.
49 */
50 public boolean followRedirects() {
51 return false;
52 }
53
54 /** Determines if the weblog at the given resource and with the given
55 * content is this blog service.
56 * @param resource A full URI to this resource, such as
57 * "http://www.codinginparadise.org".
58 * @param content The full HTML content at the resource's URL.
59 * @throws FeedParserException Thrown if an error occurs while
60 * determining the type of this weblog.
61 */
62 public boolean isThisService(String resource, String content)
63 throws FeedParserException {
64 boolean results = false;
65
66 // This is the only kind of blog that we need to check for a
67 // 'Powered by Blosxom'. We do this with the alt= value on the
68 // Powered By image.
69 // FIXME: This might be fragile, but it is used across all of the
70 // Blosxom blogs I have looked at so far. Brad Neuberg, bkn3@columbia.edu
71
72 Matcher blosxomMatcher = blosxomPattern.matcher(content);
73 results = blosxomMatcher.find();
74
75 return results;
76 }
77
78 /**
79 * Returns an array of FeedReferences that contains information on the
80 * usual locations this blog service contains its feed. The feeds should
81 * be ordered by quality, so that higher quality feeds come before lower
82 * quality ones (i.e. you would want to have an Atom FeedReference
83 * object come before an RSS 0.91 FeedReference object in this list).
84 * @param resource A URL to the given weblog that might be used to build
85 * up where feeds are usually located.
86 * @param content The full content of the resource URL, which might
87 * be useful to determine where feeds are usually located. This can be
88 * null.
89 * @throws FeedParserException Thrown if an error occurs while trying
90 * to determine the usual locations of feeds for this service.
91 */
92 public FeedReference[] getFeedLocations(String resource,
93 String content)
94 throws FeedParserException {
95 // there is sometimes an index.rss20 file, but Blosxom has a bug where
96 // it incorrectly responds to HTTP HEAD requests for that file,
97 // saying that it exists when it doesn't. Most sites don't seem
98 // to have this file so we don't include it here.
99 // Brad Neuberg, bkn3@columbia.edu
100 FeedReference[] blosxomLocations =
101 { new FeedReference("index.rss", FeedReference.RSS_MEDIA_TYPE) };
102
103 return blosxomLocations;
104 }
105
106 /** This method takes a resource, such as "http://www.codinginparadise.org/myweblog.php" target="alexandria_uri">http://www.codinginparadise.org/myweblog.php",
107 * and gets the path necessary to build up a feed, such as
108 * "http://www.codinginparadise.org/". Basicly it appends a slash
109 * to the end if there is not one, and removes any file names that
110 * might be at the end, such as "myweblog.php".
111 *
112 * There is a special exception for some Blosxom blogs,
113 * which have things inside of a cgi-script and 'hang' their RSS files
114 * off of this cgi-bin. For example,
115 * http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi has its RSS file
116 * at http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi/index.rss, so
117 * we must return the blosxom.cgi at the end as well for this method.
118 *
119 * @throws MalformedURLException Thrown if the given resource's URL is
120 * incorrectly formatted.
121 */
122 public String getBaseFeedPath( String resource ) {
123
124 // strip off any query string or anchors
125 int end = resource.lastIndexOf( "#" );
126
127 if ( end != -1 )
128 resource = resource.substring( 0, end );
129
130 end = resource.lastIndexOf( "?" );
131
132 if ( end != -1 )
133 resource = resource.substring( 0, end );
134
135 if ( ! resource.endsWith( "/" ) ) {
136 resource = resource + "/";
137 }
138
139 return resource;
140 }
141 }