001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.Arrays;
020
021/**
022 * A matcher class that can be queried to determine if a character array
023 * portion matches.
024 * <p>
025 * This class comes complete with various factory methods.
026 * If these do not suffice, you can subclass and implement your own matcher.
027 *
028 * @since 1.0
029 */
030public abstract class StrMatcher {
031
032    /**
033     * Matches the comma character.
034     */
035    private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
036    /**
037     * Matches the tab character.
038     */
039    private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
040    /**
041     * Matches the space character.
042     */
043    private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
044    /**
045     * Matches the same characters as StringTokenizer,
046     * namely space, tab, newline, formfeed.
047     */
048    private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
049    /**
050     * Matches the String trim() whitespace characters.
051     */
052    private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
053    /**
054     * Matches the double quote character.
055     */
056    private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
057    /**
058     * Matches the double quote character.
059     */
060    private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
061    /**
062     * Matches the single or double quote character.
063     */
064    private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
065    /**
066     * Matches no characters.
067     */
068    private static final StrMatcher NONE_MATCHER = new NoMatcher();
069
070    // -----------------------------------------------------------------------
071
072    /**
073     * Returns a matcher which matches the comma character.
074     *
075     * @return a matcher for a comma
076     */
077    public static StrMatcher commaMatcher() {
078        return COMMA_MATCHER;
079    }
080
081    /**
082     * Returns a matcher which matches the tab character.
083     *
084     * @return a matcher for a tab
085     */
086    public static StrMatcher tabMatcher() {
087        return TAB_MATCHER;
088    }
089
090    /**
091     * Returns a matcher which matches the space character.
092     *
093     * @return a matcher for a space
094     */
095    public static StrMatcher spaceMatcher() {
096        return SPACE_MATCHER;
097    }
098
099    /**
100     * Matches the same characters as StringTokenizer,
101     * namely space, tab, newline and formfeed.
102     *
103     * @return the split matcher
104     */
105    public static StrMatcher splitMatcher() {
106        return SPLIT_MATCHER;
107    }
108
109    /**
110     * Matches the String trim() whitespace characters.
111     *
112     * @return the trim matcher
113     */
114    public static StrMatcher trimMatcher() {
115        return TRIM_MATCHER;
116    }
117
118    /**
119     * Returns a matcher which matches the single quote character.
120     *
121     * @return a matcher for a single quote
122     */
123    public static StrMatcher singleQuoteMatcher() {
124        return SINGLE_QUOTE_MATCHER;
125    }
126
127    /**
128     * Returns a matcher which matches the double quote character.
129     *
130     * @return a matcher for a double quote
131     */
132    public static StrMatcher doubleQuoteMatcher() {
133        return DOUBLE_QUOTE_MATCHER;
134    }
135
136    /**
137     * Returns a matcher which matches the single or double quote character.
138     *
139     * @return a matcher for a single or double quote
140     */
141    public static StrMatcher quoteMatcher() {
142        return QUOTE_MATCHER;
143    }
144
145    /**
146     * Matches no characters.
147     *
148     * @return a matcher that matches nothing
149     */
150    public static StrMatcher noneMatcher() {
151        return NONE_MATCHER;
152    }
153
154    /**
155     * Constructor that creates a matcher from a character.
156     *
157     * @param ch  the character to match, must not be null
158     * @return a new Matcher for the given char
159     */
160    public static StrMatcher charMatcher(final char ch) {
161        return new CharMatcher(ch);
162    }
163
164    /**
165     * Constructor that creates a matcher from a set of characters.
166     *
167     * @param chars  the characters to match, null or empty matches nothing
168     * @return a new matcher for the given char[]
169     */
170    public static StrMatcher charSetMatcher(final char... chars) {
171        if (chars == null || chars.length == 0) {
172            return NONE_MATCHER;
173        }
174        if (chars.length == 1) {
175            return new CharMatcher(chars[0]);
176        }
177        return new CharSetMatcher(chars);
178    }
179
180    /**
181     * Constructor that creates a matcher from a string representing a set of characters.
182     *
183     * @param chars  the characters to match, null or empty matches nothing
184     * @return a new Matcher for the given characters
185     */
186    public static StrMatcher charSetMatcher(final String chars) {
187        if (chars == null || chars.length() == 0) {
188            return NONE_MATCHER;
189        }
190        if (chars.length() == 1) {
191            return new CharMatcher(chars.charAt(0));
192        }
193        return new CharSetMatcher(chars.toCharArray());
194    }
195
196    /**
197     * Constructor that creates a matcher from a string.
198     *
199     * @param str  the string to match, null or empty matches nothing
200     * @return a new Matcher for the given String
201     */
202    public static StrMatcher stringMatcher(final String str) {
203        if (str == null || str.length() == 0) {
204            return NONE_MATCHER;
205        }
206        return new StringMatcher(str);
207    }
208
209    //-----------------------------------------------------------------------
210    /**
211     * Constructor.
212     */
213    protected StrMatcher() {
214        super();
215    }
216
217    /**
218     * Returns the number of matching characters, zero for no match.
219     * <p>
220     * This method is called to check for a match.
221     * The parameter <code>pos</code> represents the current position to be
222     * checked in the string <code>buffer</code> (a character array which must
223     * not be changed).
224     * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
225     * <p>
226     * The character array may be larger than the active area to be matched.
227     * Only values in the buffer between the specified indices may be accessed.
228     * <p>
229     * The matching code may check one character or many.
230     * It may check characters preceding <code>pos</code> as well as those
231     * after, so long as no checks exceed the bounds specified.
232     * <p>
233     * It must return zero for no match, or a positive number if a match was found.
234     * The number indicates the number of characters that matched.
235     *
236     * @param buffer  the text content to match against, do not change
237     * @param pos  the starting position for the match, valid for buffer
238     * @param bufferStart  the first active index in the buffer, valid for buffer
239     * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
240     * @return the number of matching characters, zero for no match
241     */
242    public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
243
244    /**
245     * Returns the number of matching characters, zero for no match.
246     * <p>
247     * This method is called to check for a match.
248     * The parameter <code>pos</code> represents the current position to be
249     * checked in the string <code>buffer</code> (a character array which must
250     * not be changed).
251     * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
252     * <p>
253     * The matching code may check one character or many.
254     * It may check characters preceding <code>pos</code> as well as those after.
255     * <p>
256     * It must return zero for no match, or a positive number if a match was found.
257     * The number indicates the number of characters that matched.
258     *
259     * @param buffer  the text content to match against, do not change
260     * @param pos  the starting position for the match, valid for buffer
261     * @return the number of matching characters, zero for no match
262     */
263    public int isMatch(final char[] buffer, final int pos) {
264        return isMatch(buffer, pos, 0, buffer.length);
265    }
266
267    //-----------------------------------------------------------------------
268    /**
269     * Class used to define a set of characters for matching purposes.
270     */
271    static final class CharSetMatcher extends StrMatcher {
272        /** The set of characters to match. */
273        private final char[] chars;
274
275        /**
276         * Constructor that creates a matcher from a character array.
277         *
278         * @param chars  the characters to match, must not be null
279         */
280        CharSetMatcher(final char chars[]) {
281            super();
282            this.chars = chars.clone();
283            Arrays.sort(this.chars);
284        }
285
286        /**
287         * Returns whether or not the given character matches.
288         *
289         * @param buffer  the text content to match against, do not change
290         * @param pos  the starting position for the match, valid for buffer
291         * @param bufferStart  the first active index in the buffer, valid for buffer
292         * @param bufferEnd  the end index of the active buffer, valid for buffer
293         * @return the number of matching characters, zero for no match
294         */
295        @Override
296        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
297            return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
298        }
299    }
300
301    //-----------------------------------------------------------------------
302    /**
303     * Class used to define a character for matching purposes.
304     */
305    static final class CharMatcher extends StrMatcher {
306        /** The character to match. */
307        private final char ch;
308
309        /**
310         * Constructor that creates a matcher that matches a single character.
311         *
312         * @param ch  the character to match
313         */
314        CharMatcher(final char ch) {
315            super();
316            this.ch = ch;
317        }
318
319        /**
320         * Returns whether or not the given character matches.
321         *
322         * @param buffer  the text content to match against, do not change
323         * @param pos  the starting position for the match, valid for buffer
324         * @param bufferStart  the first active index in the buffer, valid for buffer
325         * @param bufferEnd  the end index of the active buffer, valid for buffer
326         * @return the number of matching characters, zero for no match
327         */
328        @Override
329        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
330            return ch == buffer[pos] ? 1 : 0;
331        }
332    }
333
334    //-----------------------------------------------------------------------
335    /**
336     * Class used to define a set of characters for matching purposes.
337     */
338    static final class StringMatcher extends StrMatcher {
339        /** The string to match, as a character array. */
340        private final char[] chars;
341
342        /**
343         * Constructor that creates a matcher from a String.
344         *
345         * @param str  the string to match, must not be null
346         */
347        StringMatcher(final String str) {
348            super();
349            chars = str.toCharArray();
350        }
351
352        /**
353         * Returns whether or not the given text matches the stored string.
354         *
355         * @param buffer  the text content to match against, do not change
356         * @param pos  the starting position for the match, valid for buffer
357         * @param bufferStart  the first active index in the buffer, valid for buffer
358         * @param bufferEnd  the end index of the active buffer, valid for buffer
359         * @return the number of matching characters, zero for no match
360         */
361        @Override
362        public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
363            final int len = chars.length;
364            if (pos + len > bufferEnd) {
365                return 0;
366            }
367            for (int i = 0; i < chars.length; i++, pos++) {
368                if (chars[i] != buffer[pos]) {
369                    return 0;
370                }
371            }
372            return len;
373        }
374        
375        @Override
376        public String toString() {
377            return super.toString() + ' ' + Arrays.toString(chars);
378        }
379
380    }
381
382    //-----------------------------------------------------------------------
383    /**
384     * Class used to match no characters.
385     */
386    static final class NoMatcher extends StrMatcher {
387
388        /**
389         * Constructs a new instance of <code>NoMatcher</code>.
390         */
391        NoMatcher() {
392            super();
393        }
394
395        /**
396         * Always returns <code>false</code>.
397         *
398         * @param buffer  the text content to match against, do not change
399         * @param pos  the starting position for the match, valid for buffer
400         * @param bufferStart  the first active index in the buffer, valid for buffer
401         * @param bufferEnd  the end index of the active buffer, valid for buffer
402         * @return the number of matching characters, zero for no match
403         */
404        @Override
405        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
406            return 0;
407        }
408    }
409
410    //-----------------------------------------------------------------------
411    /**
412     * Class used to match whitespace as per trim().
413     */
414    static final class TrimMatcher extends StrMatcher {
415
416        /**
417         * Constructs a new instance of <code>TrimMatcher</code>.
418         */
419        TrimMatcher() {
420            super();
421        }
422
423        /**
424         * Returns whether or not the given character matches.
425         *
426         * @param buffer  the text content to match against, do not change
427         * @param pos  the starting position for the match, valid for buffer
428         * @param bufferStart  the first active index in the buffer, valid for buffer
429         * @param bufferEnd  the end index of the active buffer, valid for buffer
430         * @return the number of matching characters, zero for no match
431         */
432        @Override
433        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
434            return buffer[pos] <= 32 ? 1 : 0;
435        }
436    }
437
438}