001    /*
002     * Copyright 1999,2004 The Apache Software Foundation.
003     * 
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     * 
008     *      http://www.apache.org/licenses/LICENSE-2.0
009     * 
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    package org.apache.commons.feedparser.locate.blogservice;
018    
019    import java.net.*;
020    import java.util.regex.Matcher;
021    import java.util.regex.Pattern;
022    
023    import org.apache.commons.feedparser.FeedParserException;
024    import org.apache.commons.feedparser.locate.*;
025    
026    /**
027     * Models the Xanga blog service, encapsulating whether a given weblog
028     * is this type of service and where it usually keeps its feeds.
029     * 
030     * @author Brad Neuberg, bkn3@columbia.edu
031     */
032    public class Xanga extends BlogService {
033        
034        /**
035         * A regex to extract the user from a Xanga URL
036         */
037        private static Pattern xangaURLPattern = Pattern.compile(".*user=(\\w*)");
038            
039        /** Returns whether we can trust the results of this blog service's 
040         *  autodiscovery links.  For example, TextAmerica returns invalid 
041         *  autodiscovery results.
042         */
043        public boolean hasValidAutoDiscovery() {
044            return true;
045        }
046        
047        /** Returns whether we should follow HTTP redirects for this blog service.
048         *  Some services don't implement HTTP redirects correctly, while others,
049         *  like Xanga, require it.
050         */
051        public boolean followRedirects() {
052            return true;
053        }
054        
055        /** Determines if the weblog at the given resource and with the given
056         *  content is this blog service.
057         * @param resource A full URI to this resource, such as 
058         * "http://www.codinginparadise.org".
059         * @param content The full HTML content at the resource's URL.
060         * @throws FeedParserException Thrown if an error occurs while 
061         * determining the type of this weblog.
062         */
063        public boolean isThisService(String resource, String content)
064                                                    throws FeedParserException {
065            boolean results = false;
066            
067            results = containsDomain(resource, "xanga.com");
068            
069            return results;
070        }
071    
072        /**
073         * Returns an array of FeedReferences that contains information on the
074         * usual locations this blog service contains its feed.  The feeds should
075         * be ordered by quality, so that higher quality feeds come before lower
076         * quality ones (i.e. you would want to have an Atom FeedReference
077         * object come before an RSS 0.91 FeedReference object in this list).
078         * @param resource A URL to the given weblog that might be used to build
079         * up where feeds are usually located.
080         * @param content The full content of the resource URL, which might
081         * be useful to determine where feeds are usually located.  This can be
082         * null.
083         * @throws FeedParserException Thrown if an error occurs while trying
084         * to determine the usual locations of feeds for this service.
085         */
086        public FeedReference[] getFeedLocations(String resource,
087                                                String content)
088                                                    throws FeedParserException {
089            // Xanga feeds have to be handled specially since they put their
090            // feeds at the location: http://www.xanga.com/rss.aspx?user=username
091            String user = getXangaUser(resource);
092            FeedReference xangaLocations[] =
093                { new FeedReference("rss.aspx?user=" + user, 
094                                    FeedReference.RSS_MEDIA_TYPE) };
095            
096            return xangaLocations;
097        }
098        
099        /** Xanga's feed locations are dependent on the 'user' attribute in a
100         *  Xanga URI.  This method helps extract the user element from an 
101         *  existing URI, such as http://www.xanga.com/home.aspx?user=wdfphillz.
102         */
103        protected String getXangaUser(String resource) {
104            Matcher xangaMatcher = xangaURLPattern.matcher(resource);
105            xangaMatcher.matches();
106            
107            return xangaMatcher.group(1);
108        }
109    }