001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3.text;
018
019import java.util.Arrays;
020
021import org.apache.commons.lang3.StringUtils;
022
023/**
024 * A matcher class that can be queried to determine if a character array
025 * portion matches.
026 * <p>
027 * This class comes complete with various factory methods.
028 * If these do not suffice, you can subclass and implement your own matcher.
029 *
030 * @since 2.2
031 * @version $Id: StrMatcher.java 1436770 2013-01-22 07:09:45Z ggregory $
032 */
033public abstract class StrMatcher {
034
035    /**
036     * Matches the comma character.
037     */
038    private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
039    /**
040     * Matches the tab character.
041     */
042    private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
043    /**
044     * Matches the space character.
045     */
046    private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
047    /**
048     * Matches the same characters as StringTokenizer,
049     * namely space, tab, newline, formfeed.
050     */
051    private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
052    /**
053     * Matches the String trim() whitespace characters.
054     */
055    private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
056    /**
057     * Matches the double quote character.
058     */
059    private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
060    /**
061     * Matches the double quote character.
062     */
063    private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
064    /**
065     * Matches the single or double quote character.
066     */
067    private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
068    /**
069     * Matches no characters.
070     */
071    private static final StrMatcher NONE_MATCHER = new NoMatcher();
072
073    // -----------------------------------------------------------------------
074
075    /**
076     * Returns a matcher which matches the comma character.
077     *
078     * @return a matcher for a comma
079     */
080    public static StrMatcher commaMatcher() {
081        return COMMA_MATCHER;
082    }
083
084    /**
085     * Returns a matcher which matches the tab character.
086     *
087     * @return a matcher for a tab
088     */
089    public static StrMatcher tabMatcher() {
090        return TAB_MATCHER;
091    }
092
093    /**
094     * Returns a matcher which matches the space character.
095     *
096     * @return a matcher for a space
097     */
098    public static StrMatcher spaceMatcher() {
099        return SPACE_MATCHER;
100    }
101
102    /**
103     * Matches the same characters as StringTokenizer,
104     * namely space, tab, newline and formfeed.
105     *
106     * @return the split matcher
107     */
108    public static StrMatcher splitMatcher() {
109        return SPLIT_MATCHER;
110    }
111
112    /**
113     * Matches the String trim() whitespace characters.
114     *
115     * @return the trim matcher
116     */
117    public static StrMatcher trimMatcher() {
118        return TRIM_MATCHER;
119    }
120
121    /**
122     * Returns a matcher which matches the single quote character.
123     *
124     * @return a matcher for a single quote
125     */
126    public static StrMatcher singleQuoteMatcher() {
127        return SINGLE_QUOTE_MATCHER;
128    }
129
130    /**
131     * Returns a matcher which matches the double quote character.
132     *
133     * @return a matcher for a double quote
134     */
135    public static StrMatcher doubleQuoteMatcher() {
136        return DOUBLE_QUOTE_MATCHER;
137    }
138
139    /**
140     * Returns a matcher which matches the single or double quote character.
141     *
142     * @return a matcher for a single or double quote
143     */
144    public static StrMatcher quoteMatcher() {
145        return QUOTE_MATCHER;
146    }
147
148    /**
149     * Matches no characters.
150     *
151     * @return a matcher that matches nothing
152     */
153    public static StrMatcher noneMatcher() {
154        return NONE_MATCHER;
155    }
156
157    /**
158     * Constructor that creates a matcher from a character.
159     *
160     * @param ch  the character to match, must not be null
161     * @return a new Matcher for the given char
162     */
163    public static StrMatcher charMatcher(final char ch) {
164        return new CharMatcher(ch);
165    }
166
167    /**
168     * Constructor that creates a matcher from a set of characters.
169     *
170     * @param chars  the characters to match, null or empty matches nothing
171     * @return a new matcher for the given char[]
172     */
173    public static StrMatcher charSetMatcher(final char... chars) {
174        if (chars == null || chars.length == 0) {
175            return NONE_MATCHER;
176        }
177        if (chars.length == 1) {
178            return new CharMatcher(chars[0]);
179        }
180        return new CharSetMatcher(chars);
181    }
182
183    /**
184     * Constructor that creates a matcher from a string representing a set of characters.
185     *
186     * @param chars  the characters to match, null or empty matches nothing
187     * @return a new Matcher for the given characters
188     */
189    public static StrMatcher charSetMatcher(final String chars) {
190        if (StringUtils.isEmpty(chars)) {
191            return NONE_MATCHER;
192        }
193        if (chars.length() == 1) {
194            return new CharMatcher(chars.charAt(0));
195        }
196        return new CharSetMatcher(chars.toCharArray());
197    }
198
199    /**
200     * Constructor that creates a matcher from a string.
201     *
202     * @param str  the string to match, null or empty matches nothing
203     * @return a new Matcher for the given String
204     */
205    public static StrMatcher stringMatcher(final String str) {
206        if (StringUtils.isEmpty(str)) {
207            return NONE_MATCHER;
208        }
209        return new StringMatcher(str);
210    }
211
212    //-----------------------------------------------------------------------
213    /**
214     * Constructor.
215     */
216    protected StrMatcher() {
217        super();
218    }
219
220    /**
221     * Returns the number of matching characters, zero for no match.
222     * <p>
223     * This method is called to check for a match.
224     * The parameter <code>pos</code> represents the current position to be
225     * checked in the string <code>buffer</code> (a character array which must
226     * not be changed).
227     * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
228     * <p>
229     * The character array may be larger than the active area to be matched.
230     * Only values in the buffer between the specifed indices may be accessed.
231     * <p>
232     * The matching code may check one character or many.
233     * It may check characters preceding <code>pos</code> as well as those
234     * after, so long as no checks exceed the bounds specified.
235     * <p>
236     * It must return zero for no match, or a positive number if a match was found.
237     * The number indicates the number of characters that matched.
238     *
239     * @param buffer  the text content to match against, do not change
240     * @param pos  the starting position for the match, valid for buffer
241     * @param bufferStart  the first active index in the buffer, valid for buffer
242     * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
243     * @return the number of matching characters, zero for no match
244     */
245    public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
246
247    /**
248     * Returns the number of matching characters, zero for no match.
249     * <p>
250     * This method is called to check for a match.
251     * The parameter <code>pos</code> represents the current position to be
252     * checked in the string <code>buffer</code> (a character array which must
253     * not be changed).
254     * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
255     * <p>
256     * The matching code may check one character or many.
257     * It may check characters preceding <code>pos</code> as well as those after.
258     * <p>
259     * It must return zero for no match, or a positive number if a match was found.
260     * The number indicates the number of characters that matched.
261     *
262     * @param buffer  the text content to match against, do not change
263     * @param pos  the starting position for the match, valid for buffer
264     * @return the number of matching characters, zero for no match
265     * @since 2.4
266     */
267    public int isMatch(final char[] buffer, final int pos) {
268        return isMatch(buffer, pos, 0, buffer.length);
269    }
270
271    //-----------------------------------------------------------------------
272    /**
273     * Class used to define a set of characters for matching purposes.
274     */
275    static final class CharSetMatcher extends StrMatcher {
276        /** The set of characters to match. */
277        private final char[] chars;
278
279        /**
280         * Constructor that creates a matcher from a character array.
281         *
282         * @param chars  the characters to match, must not be null
283         */
284        CharSetMatcher(final char chars[]) {
285            super();
286            this.chars = chars.clone();
287            Arrays.sort(this.chars);
288        }
289
290        /**
291         * Returns whether or not the given character matches.
292         *
293         * @param buffer  the text content to match against, do not change
294         * @param pos  the starting position for the match, valid for buffer
295         * @param bufferStart  the first active index in the buffer, valid for buffer
296         * @param bufferEnd  the end index of the active buffer, valid for buffer
297         * @return the number of matching characters, zero for no match
298         */
299        @Override
300        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
301            return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
302        }
303    }
304
305    //-----------------------------------------------------------------------
306    /**
307     * Class used to define a character for matching purposes.
308     */
309    static final class CharMatcher extends StrMatcher {
310        /** The character to match. */
311        private final char ch;
312
313        /**
314         * Constructor that creates a matcher that matches a single character.
315         *
316         * @param ch  the character to match
317         */
318        CharMatcher(final char ch) {
319            super();
320            this.ch = ch;
321        }
322
323        /**
324         * Returns whether or not the given character matches.
325         *
326         * @param buffer  the text content to match against, do not change
327         * @param pos  the starting position for the match, valid for buffer
328         * @param bufferStart  the first active index in the buffer, valid for buffer
329         * @param bufferEnd  the end index of the active buffer, valid for buffer
330         * @return the number of matching characters, zero for no match
331         */
332        @Override
333        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
334            return ch == buffer[pos] ? 1 : 0;
335        }
336    }
337
338    //-----------------------------------------------------------------------
339    /**
340     * Class used to define a set of characters for matching purposes.
341     */
342    static final class StringMatcher extends StrMatcher {
343        /** The string to match, as a character array. */
344        private final char[] chars;
345
346        /**
347         * Constructor that creates a matcher from a String.
348         *
349         * @param str  the string to match, must not be null
350         */
351        StringMatcher(final String str) {
352            super();
353            chars = str.toCharArray();
354        }
355
356        /**
357         * Returns whether or not the given text matches the stored string.
358         *
359         * @param buffer  the text content to match against, do not change
360         * @param pos  the starting position for the match, valid for buffer
361         * @param bufferStart  the first active index in the buffer, valid for buffer
362         * @param bufferEnd  the end index of the active buffer, valid for buffer
363         * @return the number of matching characters, zero for no match
364         */
365        @Override
366        public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
367            final int len = chars.length;
368            if (pos + len > bufferEnd) {
369                return 0;
370            }
371            for (int i = 0; i < chars.length; i++, pos++) {
372                if (chars[i] != buffer[pos]) {
373                    return 0;
374                }
375            }
376            return len;
377        }
378    }
379
380    //-----------------------------------------------------------------------
381    /**
382     * Class used to match no characters.
383     */
384    static final class NoMatcher extends StrMatcher {
385
386        /**
387         * Constructs a new instance of <code>NoMatcher</code>.
388         */
389        NoMatcher() {
390            super();
391        }
392
393        /**
394         * Always returns <code>false</code>.
395         *
396         * @param buffer  the text content to match against, do not change
397         * @param pos  the starting position for the match, valid for buffer
398         * @param bufferStart  the first active index in the buffer, valid for buffer
399         * @param bufferEnd  the end index of the active buffer, valid for buffer
400         * @return the number of matching characters, zero for no match
401         */
402        @Override
403        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
404            return 0;
405        }
406    }
407
408    //-----------------------------------------------------------------------
409    /**
410     * Class used to match whitespace as per trim().
411     */
412    static final class TrimMatcher extends StrMatcher {
413
414        /**
415         * Constructs a new instance of <code>TrimMatcher</code>.
416         */
417        TrimMatcher() {
418            super();
419        }
420
421        /**
422         * Returns whether or not the given character matches.
423         *
424         * @param buffer  the text content to match against, do not change
425         * @param pos  the starting position for the match, valid for buffer
426         * @param bufferStart  the first active index in the buffer, valid for buffer
427         * @param bufferEnd  the end index of the active buffer, valid for buffer
428         * @return the number of matching characters, zero for no match
429         */
430        @Override
431        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
432            return buffer[pos] <= 32 ? 1 : 0;
433        }
434    }
435
436}