001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.lang3.text;
018    
019    import java.util.Arrays;
020    
021    import org.apache.commons.lang3.StringUtils;
022    
023    /**
024     * A matcher class that can be queried to determine if a character array
025     * portion matches.
026     * <p>
027     * This class comes complete with various factory methods.
028     * If these do not suffice, you can subclass and implement your own matcher.
029     *
030     * @since 2.2
031     * @version $Id: StrMatcher.java 1144925 2011-07-10 18:07:05Z ggregory $
032     */
033    public abstract class StrMatcher {
034    
035        /**
036         * Matches the comma character.
037         */
038        private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
039        /**
040         * Matches the tab character.
041         */
042        private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
043        /**
044         * Matches the space character.
045         */
046        private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
047        /**
048         * Matches the same characters as StringTokenizer,
049         * namely space, tab, newline, formfeed.
050         */
051        private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
052        /**
053         * Matches the String trim() whitespace characters.
054         */
055        private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
056        /**
057         * Matches the double quote character.
058         */
059        private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
060        /**
061         * Matches the double quote character.
062         */
063        private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
064        /**
065         * Matches the single or double quote character.
066         */
067        private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
068        /**
069         * Matches no characters.
070         */
071        private static final StrMatcher NONE_MATCHER = new NoMatcher();
072    
073        // -----------------------------------------------------------------------
074    
075        /**
076         * Returns a matcher which matches the comma character.
077         *
078         * @return a matcher for a comma
079         */
080        public static StrMatcher commaMatcher() {
081            return COMMA_MATCHER;
082        }
083    
084        /**
085         * Returns a matcher which matches the tab character.
086         *
087         * @return a matcher for a tab
088         */
089        public static StrMatcher tabMatcher() {
090            return TAB_MATCHER;
091        }
092    
093        /**
094         * Returns a matcher which matches the space character.
095         *
096         * @return a matcher for a space
097         */
098        public static StrMatcher spaceMatcher() {
099            return SPACE_MATCHER;
100        }
101    
102        /**
103         * Matches the same characters as StringTokenizer,
104         * namely space, tab, newline and formfeed.
105         *
106         * @return the split matcher
107         */
108        public static StrMatcher splitMatcher() {
109            return SPLIT_MATCHER;
110        }
111    
112        /**
113         * Matches the String trim() whitespace characters.
114         *
115         * @return the trim matcher
116         */
117        public static StrMatcher trimMatcher() {
118            return TRIM_MATCHER;
119        }
120    
121        /**
122         * Returns a matcher which matches the single quote character.
123         *
124         * @return a matcher for a single quote
125         */
126        public static StrMatcher singleQuoteMatcher() {
127            return SINGLE_QUOTE_MATCHER;
128        }
129    
130        /**
131         * Returns a matcher which matches the double quote character.
132         *
133         * @return a matcher for a double quote
134         */
135        public static StrMatcher doubleQuoteMatcher() {
136            return DOUBLE_QUOTE_MATCHER;
137        }
138    
139        /**
140         * Returns a matcher which matches the single or double quote character.
141         *
142         * @return a matcher for a single or double quote
143         */
144        public static StrMatcher quoteMatcher() {
145            return QUOTE_MATCHER;
146        }
147    
148        /**
149         * Matches no characters.
150         *
151         * @return a matcher that matches nothing
152         */
153        public static StrMatcher noneMatcher() {
154            return NONE_MATCHER;
155        }
156    
157        /**
158         * Constructor that creates a matcher from a character.
159         *
160         * @param ch  the character to match, must not be null
161         * @return a new Matcher for the given char
162         */
163        public static StrMatcher charMatcher(char ch) {
164            return new CharMatcher(ch);
165        }
166    
167        /**
168         * Constructor that creates a matcher from a set of characters.
169         *
170         * @param chars  the characters to match, null or empty matches nothing
171         * @return a new matcher for the given char[]
172         */
173        public static StrMatcher charSetMatcher(char... chars) {
174            if (chars == null || chars.length == 0) {
175                return NONE_MATCHER;
176            }
177            if (chars.length == 1) {
178                return new CharMatcher(chars[0]);
179            }
180            return new CharSetMatcher(chars);
181        }
182    
183        /**
184         * Constructor that creates a matcher from a string representing a set of characters.
185         *
186         * @param chars  the characters to match, null or empty matches nothing
187         * @return a new Matcher for the given characters
188         */
189        public static StrMatcher charSetMatcher(String chars) {
190            if (chars == null || chars.length() == 0) {
191                return NONE_MATCHER;
192            }
193            if (chars.length() == 1) {
194                return new CharMatcher(chars.charAt(0));
195            }
196            return new CharSetMatcher(chars.toCharArray());
197        }
198    
199        /**
200         * Constructor that creates a matcher from a string.
201         *
202         * @param str  the string to match, null or empty matches nothing
203         * @return a new Matcher for the given String
204         */
205        public static StrMatcher stringMatcher(String str) {
206            if (StringUtils.isEmpty(str)) {
207                return NONE_MATCHER;
208            }
209            return new StringMatcher(str);
210        }
211    
212        //-----------------------------------------------------------------------
213        /**
214         * Constructor.
215         */
216        protected StrMatcher() {
217            super();
218        }
219    
220        /**
221         * Returns the number of matching characters, zero for no match.
222         * <p>
223         * This method is called to check for a match.
224         * The parameter <code>pos</code> represents the current position to be
225         * checked in the string <code>buffer</code> (a character array which must
226         * not be changed).
227         * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
228         * <p>
229         * The character array may be larger than the active area to be matched.
230         * Only values in the buffer between the specifed indices may be accessed.
231         * <p>
232         * The matching code may check one character or many.
233         * It may check characters preceeding <code>pos</code> as well as those
234         * after, so long as no checks exceed the bounds specified.
235         * <p>
236         * It must return zero for no match, or a positive number if a match was found.
237         * The number indicates the number of characters that matched.
238         *
239         * @param buffer  the text content to match against, do not change
240         * @param pos  the starting position for the match, valid for buffer
241         * @param bufferStart  the first active index in the buffer, valid for buffer
242         * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
243         * @return the number of matching characters, zero for no match
244         */
245        public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
246    
247        /**
248         * Returns the number of matching characters, zero for no match.
249         * <p>
250         * This method is called to check for a match.
251         * The parameter <code>pos</code> represents the current position to be
252         * checked in the string <code>buffer</code> (a character array which must
253         * not be changed).
254         * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
255         * <p>
256         * The matching code may check one character or many.
257         * It may check characters preceeding <code>pos</code> as well as those after.
258         * <p>
259         * It must return zero for no match, or a positive number if a match was found.
260         * The number indicates the number of characters that matched.
261         *
262         * @param buffer  the text content to match against, do not change
263         * @param pos  the starting position for the match, valid for buffer
264         * @return the number of matching characters, zero for no match
265         * @since 2.4
266         */
267        public int isMatch(char[] buffer, int pos) {
268            return isMatch(buffer, pos, 0, buffer.length);
269        }
270    
271        //-----------------------------------------------------------------------
272        /**
273         * Class used to define a set of characters for matching purposes.
274         */
275        static final class CharSetMatcher extends StrMatcher {
276            /** The set of characters to match. */
277            private final char[] chars;
278    
279            /**
280             * Constructor that creates a matcher from a character array.
281             *
282             * @param chars  the characters to match, must not be null
283             */
284            CharSetMatcher(char chars[]) {
285                super();
286                this.chars = chars.clone();
287                Arrays.sort(this.chars);
288            }
289    
290            /**
291             * Returns whether or not the given character matches.
292             *
293             * @param buffer  the text content to match against, do not change
294             * @param pos  the starting position for the match, valid for buffer
295             * @param bufferStart  the first active index in the buffer, valid for buffer
296             * @param bufferEnd  the end index of the active buffer, valid for buffer
297             * @return the number of matching characters, zero for no match
298             */
299            @Override
300            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
301                return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
302            }
303        }
304    
305        //-----------------------------------------------------------------------
306        /**
307         * Class used to define a character for matching purposes.
308         */
309        static final class CharMatcher extends StrMatcher {
310            /** The character to match. */
311            private final char ch;
312    
313            /**
314             * Constructor that creates a matcher that matches a single character.
315             *
316             * @param ch  the character to match
317             */
318            CharMatcher(char ch) {
319                super();
320                this.ch = ch;
321            }
322    
323            /**
324             * Returns whether or not the given character matches.
325             *
326             * @param buffer  the text content to match against, do not change
327             * @param pos  the starting position for the match, valid for buffer
328             * @param bufferStart  the first active index in the buffer, valid for buffer
329             * @param bufferEnd  the end index of the active buffer, valid for buffer
330             * @return the number of matching characters, zero for no match
331             */
332            @Override
333            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
334                return ch == buffer[pos] ? 1 : 0;
335            }
336        }
337    
338        //-----------------------------------------------------------------------
339        /**
340         * Class used to define a set of characters for matching purposes.
341         */
342        static final class StringMatcher extends StrMatcher {
343            /** The string to match, as a character array. */
344            private final char[] chars;
345    
346            /**
347             * Constructor that creates a matcher from a String.
348             *
349             * @param str  the string to match, must not be null
350             */
351            StringMatcher(String str) {
352                super();
353                chars = str.toCharArray();
354            }
355    
356            /**
357             * Returns whether or not the given text matches the stored string.
358             *
359             * @param buffer  the text content to match against, do not change
360             * @param pos  the starting position for the match, valid for buffer
361             * @param bufferStart  the first active index in the buffer, valid for buffer
362             * @param bufferEnd  the end index of the active buffer, valid for buffer
363             * @return the number of matching characters, zero for no match
364             */
365            @Override
366            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
367                int len = chars.length;
368                if (pos + len > bufferEnd) {
369                    return 0;
370                }
371                for (int i = 0; i < chars.length; i++, pos++) {
372                    if (chars[i] != buffer[pos]) {
373                        return 0;
374                    }
375                }
376                return len;
377            }
378        }
379    
380        //-----------------------------------------------------------------------
381        /**
382         * Class used to match no characters.
383         */
384        static final class NoMatcher extends StrMatcher {
385    
386            /**
387             * Constructs a new instance of <code>NoMatcher</code>.
388             */
389            NoMatcher() {
390                super();
391            }
392    
393            /**
394             * Always returns <code>false</code>.
395             *
396             * @param buffer  the text content to match against, do not change
397             * @param pos  the starting position for the match, valid for buffer
398             * @param bufferStart  the first active index in the buffer, valid for buffer
399             * @param bufferEnd  the end index of the active buffer, valid for buffer
400             * @return the number of matching characters, zero for no match
401             */
402            @Override
403            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
404                return 0;
405            }
406        }
407    
408        //-----------------------------------------------------------------------
409        /**
410         * Class used to match whitespace as per trim().
411         */
412        static final class TrimMatcher extends StrMatcher {
413    
414            /**
415             * Constructs a new instance of <code>TrimMatcher</code>.
416             */
417            TrimMatcher() {
418                super();
419            }
420    
421            /**
422             * Returns whether or not the given character matches.
423             *
424             * @param buffer  the text content to match against, do not change
425             * @param pos  the starting position for the match, valid for buffer
426             * @param bufferStart  the first active index in the buffer, valid for buffer
427             * @param bufferEnd  the end index of the active buffer, valid for buffer
428             * @return the number of matching characters, zero for no match
429             */
430            @Override
431            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
432                return buffer[pos] <= 32 ? 1 : 0;
433            }
434        }
435    
436    }