001 /* 002 * Copyright 1999,2004 The Apache Software Foundation. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017 package org.apache.commons.feedparser.locate.blogservice; 018 019 import java.net.MalformedURLException; 020 import java.util.regex.*; 021 022 import org.apache.commons.feedparser.FeedParserException; 023 import org.apache.commons.feedparser.locate.*; 024 025 /** 026 * Models the Blosxom blog service, encapsulating whether a given weblog 027 * is this type of service and where it usually keeps its feeds. 028 * 029 * @author Brad Neuberg, bkn3@columbia.edu 030 */ 031 public class Blosxom extends BlogService { 032 033 /** A pattern used to discover Blosxom blogs. */ 034 private static Pattern blosxomPattern = 035 Pattern.compile("alt=[\"' ]powered by blosxom[\"' ]", 036 Pattern.CASE_INSENSITIVE); 037 038 /** Returns whether we can trust the results of this blog service's 039 * autodiscovery links. For example, TextAmerica returns invalid 040 * autodiscovery results. 041 */ 042 public boolean hasValidAutoDiscovery() { 043 return true; 044 } 045 046 /** Returns whether we should follow HTTP redirects for this blog service. 047 * Some services don't implement HTTP redirects correctly, while others, 048 * like Xanga, require it. 049 */ 050 public boolean followRedirects() { 051 return false; 052 } 053 054 /** Determines if the weblog at the given resource and with the given 055 * content is this blog service. 056 * @param resource A full URI to this resource, such as 057 * "http://www.codinginparadise.org". 058 * @param content The full HTML content at the resource's URL. 059 * @throws FeedParserException Thrown if an error occurs while 060 * determining the type of this weblog. 061 */ 062 public boolean isThisService(String resource, String content) 063 throws FeedParserException { 064 boolean results = false; 065 066 // This is the only kind of blog that we need to check for a 067 // 'Powered by Blosxom'. We do this with the alt= value on the 068 // Powered By image. 069 // FIXME: This might be fragile, but it is used across all of the 070 // Blosxom blogs I have looked at so far. Brad Neuberg, bkn3@columbia.edu 071 072 Matcher blosxomMatcher = blosxomPattern.matcher(content); 073 results = blosxomMatcher.find(); 074 075 return results; 076 } 077 078 /** 079 * Returns an array of FeedReferences that contains information on the 080 * usual locations this blog service contains its feed. The feeds should 081 * be ordered by quality, so that higher quality feeds come before lower 082 * quality ones (i.e. you would want to have an Atom FeedReference 083 * object come before an RSS 0.91 FeedReference object in this list). 084 * @param resource A URL to the given weblog that might be used to build 085 * up where feeds are usually located. 086 * @param content The full content of the resource URL, which might 087 * be useful to determine where feeds are usually located. This can be 088 * null. 089 * @throws FeedParserException Thrown if an error occurs while trying 090 * to determine the usual locations of feeds for this service. 091 */ 092 public FeedReference[] getFeedLocations(String resource, 093 String content) 094 throws FeedParserException { 095 // there is sometimes an index.rss20 file, but Blosxom has a bug where 096 // it incorrectly responds to HTTP HEAD requests for that file, 097 // saying that it exists when it doesn't. Most sites don't seem 098 // to have this file so we don't include it here. 099 // Brad Neuberg, bkn3@columbia.edu 100 FeedReference[] blosxomLocations = 101 { new FeedReference("index.rss", FeedReference.RSS_MEDIA_TYPE) }; 102 103 return blosxomLocations; 104 } 105 106 /** This method takes a resource, such as "http://www.codinginparadise.org/myweblog.php", 107 * and gets the path necessary to build up a feed, such as 108 * "http://www.codinginparadise.org/". Basicly it appends a slash 109 * to the end if there is not one, and removes any file names that 110 * might be at the end, such as "myweblog.php". 111 * 112 * There is a special exception for some Blosxom blogs, 113 * which have things inside of a cgi-script and 'hang' their RSS files 114 * off of this cgi-bin. For example, 115 * http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi has its RSS file 116 * at http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi/index.rss, so 117 * we must return the blosxom.cgi at the end as well for this method. 118 * 119 * @throws MalformedURLException Thrown if the given resource's URL is 120 * incorrectly formatted. 121 */ 122 public String getBaseFeedPath( String resource ) { 123 124 // strip off any query string or anchors 125 int end = resource.lastIndexOf( "#" ); 126 127 if ( end != -1 ) 128 resource = resource.substring( 0, end ); 129 130 end = resource.lastIndexOf( "?" ); 131 132 if ( end != -1 ) 133 resource = resource.substring( 0, end ); 134 135 if ( ! resource.endsWith( "/" ) ) { 136 resource = resource + "/"; 137 } 138 139 return resource; 140 } 141 }