001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.lang.text;
018    
019    import java.util.Arrays;
020    
021    /**
022     * A matcher class that can be queried to determine if a character array
023     * portion matches.
024     * <p>
025     * This class comes complete with various factory methods.
026     * If these do not suffice, you can subclass and implement your own matcher.
027     *
028     * @author Apache Software Foundation
029     * @since 2.2
030     * @version $Id: StrMatcher.java 905636 2010-02-02 14:03:32Z niallp $
031     */
032    public abstract class StrMatcher {
033    
034        /**
035         * Matches the comma character.
036         */
037        private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
038        /**
039         * Matches the tab character.
040         */
041        private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
042        /**
043         * Matches the space character.
044         */
045        private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
046        /**
047         * Matches the same characters as StringTokenizer,
048         * namely space, tab, newline, formfeed.
049         */
050        private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
051        /**
052         * Matches the String trim() whitespace characters.
053         */
054        private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
055        /**
056         * Matches the double quote character.
057         */
058        private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
059        /**
060         * Matches the double quote character.
061         */
062        private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
063        /**
064         * Matches the single or double quote character.
065         */
066        private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
067        /**
068         * Matches no characters.
069         */
070        private static final StrMatcher NONE_MATCHER = new NoMatcher();
071    
072        // -----------------------------------------------------------------------
073    
074        /**
075         * Returns a matcher which matches the comma character.
076         *
077         * @return a matcher for a comma
078         */
079        public static StrMatcher commaMatcher() {
080            return COMMA_MATCHER;
081        }
082    
083        /**
084         * Returns a matcher which matches the tab character.
085         *
086         * @return a matcher for a tab
087         */
088        public static StrMatcher tabMatcher() {
089            return TAB_MATCHER;
090        }
091    
092        /**
093         * Returns a matcher which matches the space character.
094         *
095         * @return a matcher for a space
096         */
097        public static StrMatcher spaceMatcher() {
098            return SPACE_MATCHER;
099        }
100    
101        /**
102         * Matches the same characters as StringTokenizer,
103         * namely space, tab, newline and formfeed.
104         *
105         * @return the split matcher
106         */
107        public static StrMatcher splitMatcher() {
108            return SPLIT_MATCHER;
109        }
110    
111        /**
112         * Matches the String trim() whitespace characters.
113         *
114         * @return the trim matcher
115         */
116        public static StrMatcher trimMatcher() {
117            return TRIM_MATCHER;
118        }
119    
120        /**
121         * Returns a matcher which matches the single quote character.
122         *
123         * @return a matcher for a single quote
124         */
125        public static StrMatcher singleQuoteMatcher() {
126            return SINGLE_QUOTE_MATCHER;
127        }
128    
129        /**
130         * Returns a matcher which matches the double quote character.
131         *
132         * @return a matcher for a double quote
133         */
134        public static StrMatcher doubleQuoteMatcher() {
135            return DOUBLE_QUOTE_MATCHER;
136        }
137    
138        /**
139         * Returns a matcher which matches the single or double quote character.
140         *
141         * @return a matcher for a single or double quote
142         */
143        public static StrMatcher quoteMatcher() {
144            return QUOTE_MATCHER;
145        }
146    
147        /**
148         * Matches no characters.
149         *
150         * @return a matcher that matches nothing
151         */
152        public static StrMatcher noneMatcher() {
153            return NONE_MATCHER;
154        }
155    
156        /**
157         * Constructor that creates a matcher from a character.
158         *
159         * @param ch  the character to match, must not be null
160         * @return a new Matcher for the given char
161         */
162        public static StrMatcher charMatcher(char ch) {
163            return new CharMatcher(ch);
164        }
165    
166        /**
167         * Constructor that creates a matcher from a set of characters.
168         *
169         * @param chars  the characters to match, null or empty matches nothing
170         * @return a new matcher for the given char[]
171         */
172        public static StrMatcher charSetMatcher(char[] chars) {
173            if (chars == null || chars.length == 0) {
174                return NONE_MATCHER;
175            }
176            if (chars.length == 1) {
177                return new CharMatcher(chars[0]);
178            }
179            return new CharSetMatcher(chars);
180        }
181    
182        /**
183         * Constructor that creates a matcher from a string representing a set of characters.
184         *
185         * @param chars  the characters to match, null or empty matches nothing
186         * @return a new Matcher for the given characters
187         */
188        public static StrMatcher charSetMatcher(String chars) {
189            if (chars == null || chars.length() == 0) {
190                return NONE_MATCHER;
191            }
192            if (chars.length() == 1) {
193                return new CharMatcher(chars.charAt(0));
194            }
195            return new CharSetMatcher(chars.toCharArray());
196        }
197    
198        /**
199         * Constructor that creates a matcher from a string.
200         *
201         * @param str  the string to match, null or empty matches nothing
202         * @return a new Matcher for the given String
203         */
204        public static StrMatcher stringMatcher(String str) {
205            if (str == null || str.length() == 0) {
206                return NONE_MATCHER;
207            }
208            return new StringMatcher(str);
209        }
210    
211        //-----------------------------------------------------------------------
212        /**
213         * Constructor.
214         */
215        protected StrMatcher() {
216            super();
217        }
218    
219        /**
220         * Returns the number of matching characters, zero for no match.
221         * <p>
222         * This method is called to check for a match.
223         * The parameter <code>pos</code> represents the current position to be
224         * checked in the string <code>buffer</code> (a character array which must
225         * not be changed).
226         * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
227         * <p>
228         * The character array may be larger than the active area to be matched.
229         * Only values in the buffer between the specifed indices may be accessed.
230         * <p>
231         * The matching code may check one character or many.
232         * It may check characters preceeding <code>pos</code> as well as those
233         * after, so long as no checks exceed the bounds specified.
234         * <p>
235         * It must return zero for no match, or a positive number if a match was found.
236         * The number indicates the number of characters that matched.
237         *
238         * @param buffer  the text content to match against, do not change
239         * @param pos  the starting position for the match, valid for buffer
240         * @param bufferStart  the first active index in the buffer, valid for buffer
241         * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
242         * @return the number of matching characters, zero for no match
243         */
244        public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
245    
246        /**
247         * Returns the number of matching characters, zero for no match.
248         * <p>
249         * This method is called to check for a match.
250         * The parameter <code>pos</code> represents the current position to be
251         * checked in the string <code>buffer</code> (a character array which must
252         * not be changed).
253         * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
254         * <p>
255         * The matching code may check one character or many.
256         * It may check characters preceeding <code>pos</code> as well as those after.
257         * <p>
258         * It must return zero for no match, or a positive number if a match was found.
259         * The number indicates the number of characters that matched.
260         *
261         * @param buffer  the text content to match against, do not change
262         * @param pos  the starting position for the match, valid for buffer
263         * @return the number of matching characters, zero for no match
264         * @since 2.4
265         */
266        public int isMatch(char[] buffer, int pos) {
267            return isMatch(buffer, pos, 0, buffer.length);
268        }
269    
270        //-----------------------------------------------------------------------
271        /**
272         * Class used to define a set of characters for matching purposes.
273         */
274        static final class CharSetMatcher extends StrMatcher {
275            /** The set of characters to match. */
276            private final char[] chars;
277    
278            /**
279             * Constructor that creates a matcher from a character array.
280             *
281             * @param chars  the characters to match, must not be null
282             */
283            CharSetMatcher(char chars[]) {
284                super();
285                this.chars = (char[]) chars.clone();
286                Arrays.sort(this.chars);
287            }
288    
289            /**
290             * Returns whether or not the given character matches.
291             *
292             * @param buffer  the text content to match against, do not change
293             * @param pos  the starting position for the match, valid for buffer
294             * @param bufferStart  the first active index in the buffer, valid for buffer
295             * @param bufferEnd  the end index of the active buffer, valid for buffer
296             * @return the number of matching characters, zero for no match
297             */
298            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
299                return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
300            }
301        }
302    
303        //-----------------------------------------------------------------------
304        /**
305         * Class used to define a character for matching purposes.
306         */
307        static final class CharMatcher extends StrMatcher {
308            /** The character to match. */
309            private final char ch;
310    
311            /**
312             * Constructor that creates a matcher that matches a single character.
313             *
314             * @param ch  the character to match
315             */
316            CharMatcher(char ch) {
317                super();
318                this.ch = ch;
319            }
320    
321            /**
322             * Returns whether or not the given character matches.
323             *
324             * @param buffer  the text content to match against, do not change
325             * @param pos  the starting position for the match, valid for buffer
326             * @param bufferStart  the first active index in the buffer, valid for buffer
327             * @param bufferEnd  the end index of the active buffer, valid for buffer
328             * @return the number of matching characters, zero for no match
329             */
330            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
331                return ch == buffer[pos] ? 1 : 0;
332            }
333        }
334    
335        //-----------------------------------------------------------------------
336        /**
337         * Class used to define a set of characters for matching purposes.
338         */
339        static final class StringMatcher extends StrMatcher {
340            /** The string to match, as a character array. */
341            private final char[] chars;
342    
343            /**
344             * Constructor that creates a matcher from a String.
345             *
346             * @param str  the string to match, must not be null
347             */
348            StringMatcher(String str) {
349                super();
350                chars = str.toCharArray();
351            }
352    
353            /**
354             * Returns whether or not the given text matches the stored string.
355             *
356             * @param buffer  the text content to match against, do not change
357             * @param pos  the starting position for the match, valid for buffer
358             * @param bufferStart  the first active index in the buffer, valid for buffer
359             * @param bufferEnd  the end index of the active buffer, valid for buffer
360             * @return the number of matching characters, zero for no match
361             */
362            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
363                int len = chars.length;
364                if (pos + len > bufferEnd) {
365                    return 0;
366                }
367                for (int i = 0; i < chars.length; i++, pos++) {
368                    if (chars[i] != buffer[pos]) {
369                        return 0;
370                    }
371                }
372                return len;
373            }
374        }
375    
376        //-----------------------------------------------------------------------
377        /**
378         * Class used to match no characters.
379         */
380        static final class NoMatcher extends StrMatcher {
381    
382            /**
383             * Constructs a new instance of <code>NoMatcher</code>.
384             */
385            NoMatcher() {
386                super();
387            }
388    
389            /**
390             * Always returns <code>false</code>.
391             *
392             * @param buffer  the text content to match against, do not change
393             * @param pos  the starting position for the match, valid for buffer
394             * @param bufferStart  the first active index in the buffer, valid for buffer
395             * @param bufferEnd  the end index of the active buffer, valid for buffer
396             * @return the number of matching characters, zero for no match
397             */
398            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
399                return 0;
400            }
401        }
402    
403        //-----------------------------------------------------------------------
404        /**
405         * Class used to match whitespace as per trim().
406         */
407        static final class TrimMatcher extends StrMatcher {
408    
409            /**
410             * Constructs a new instance of <code>TrimMatcher</code>.
411             */
412            TrimMatcher() {
413                super();
414            }
415    
416            /**
417             * Returns whether or not the given character matches.
418             *
419             * @param buffer  the text content to match against, do not change
420             * @param pos  the starting position for the match, valid for buffer
421             * @param bufferStart  the first active index in the buffer, valid for buffer
422             * @param bufferEnd  the end index of the active buffer, valid for buffer
423             * @return the number of matching characters, zero for no match
424             */
425            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
426                return buffer[pos] <= 32 ? 1 : 0;
427            }
428        }
429    
430    }