1 /* 2 * Copyright 1999,2004 The Apache Software Foundation. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.apache.commons.feedparser.locate.blogservice; 18 19 import java.net.MalformedURLException; 20 import java.util.regex.*; 21 22 import org.apache.commons.feedparser.FeedParserException; 23 import org.apache.commons.feedparser.locate.*; 24 25 /** 26 * Models the Blosxom blog service, encapsulating whether a given weblog 27 * is this type of service and where it usually keeps its feeds. 28 * 29 * @author Brad Neuberg, bkn3@columbia.edu 30 */ 31 public class Blosxom extends BlogService { 32 33 /** A pattern used to discover Blosxom blogs. */ 34 private static Pattern blosxomPattern = 35 Pattern.compile("alt=[\"' ]powered by blosxom[\"' ]", 36 Pattern.CASE_INSENSITIVE); 37 38 /** Returns whether we can trust the results of this blog service's 39 * autodiscovery links. For example, TextAmerica returns invalid 40 * autodiscovery results. 41 */ 42 public boolean hasValidAutoDiscovery() { 43 return true; 44 } 45 46 /** Returns whether we should follow HTTP redirects for this blog service. 47 * Some services don't implement HTTP redirects correctly, while others, 48 * like Xanga, require it. 49 */ 50 public boolean followRedirects() { 51 return false; 52 } 53 54 /** Determines if the weblog at the given resource and with the given 55 * content is this blog service. 56 * @param resource A full URI to this resource, such as 57 * "http://www.codinginparadise.org". 58 * @param content The full HTML content at the resource's URL. 59 * @throws FeedParserException Thrown if an error occurs while 60 * determining the type of this weblog. 61 */ 62 public boolean isThisService(String resource, String content) 63 throws FeedParserException { 64 boolean results = false; 65 66 // This is the only kind of blog that we need to check for a 67 // 'Powered by Blosxom'. We do this with the alt= value on the 68 // Powered By image. 69 // FIXME: This might be fragile, but it is used across all of the 70 // Blosxom blogs I have looked at so far. Brad Neuberg, bkn3@columbia.edu 71 72 Matcher blosxomMatcher = blosxomPattern.matcher(content); 73 results = blosxomMatcher.find(); 74 75 return results; 76 } 77 78 /** 79 * Returns an array of FeedReferences that contains information on the 80 * usual locations this blog service contains its feed. The feeds should 81 * be ordered by quality, so that higher quality feeds come before lower 82 * quality ones (i.e. you would want to have an Atom FeedReference 83 * object come before an RSS 0.91 FeedReference object in this list). 84 * @param resource A URL to the given weblog that might be used to build 85 * up where feeds are usually located. 86 * @param content The full content of the resource URL, which might 87 * be useful to determine where feeds are usually located. This can be 88 * null. 89 * @throws FeedParserException Thrown if an error occurs while trying 90 * to determine the usual locations of feeds for this service. 91 */ 92 public FeedReference[] getFeedLocations(String resource, 93 String content) 94 throws FeedParserException { 95 // there is sometimes an index.rss20 file, but Blosxom has a bug where 96 // it incorrectly responds to HTTP HEAD requests for that file, 97 // saying that it exists when it doesn't. Most sites don't seem 98 // to have this file so we don't include it here. 99 // Brad Neuberg, bkn3@columbia.edu 100 FeedReference[] blosxomLocations = 101 { new FeedReference("index.rss", FeedReference.RSS_MEDIA_TYPE) }; 102 103 return blosxomLocations; 104 } 105 106 /** This method takes a resource, such as "http://www.codinginparadise.org/myweblog.php" target="alexandria_uri">http://www.codinginparadise.org/myweblog.php", 107 * and gets the path necessary to build up a feed, such as 108 * "http://www.codinginparadise.org/". Basicly it appends a slash 109 * to the end if there is not one, and removes any file names that 110 * might be at the end, such as "myweblog.php". 111 * 112 * There is a special exception for some Blosxom blogs, 113 * which have things inside of a cgi-script and 'hang' their RSS files 114 * off of this cgi-bin. For example, 115 * http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi has its RSS file 116 * at http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi/index.rss, so 117 * we must return the blosxom.cgi at the end as well for this method. 118 * 119 * @throws MalformedURLException Thrown if the given resource's URL is 120 * incorrectly formatted. 121 */ 122 public String getBaseFeedPath( String resource ) { 123 124 // strip off any query string or anchors 125 int end = resource.lastIndexOf( "#" ); 126 127 if ( end != -1 ) 128 resource = resource.substring( 0, end ); 129 130 end = resource.lastIndexOf( "?" ); 131 132 if ( end != -1 ) 133 resource = resource.substring( 0, end ); 134 135 if ( ! resource.endsWith( "/" ) ) { 136 resource = resource + "/"; 137 } 138 139 return resource; 140 } 141 }