001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.Arrays;
020
021import org.apache.commons.lang3.ArrayUtils;
022import org.apache.commons.text.matcher.StringMatcherFactory;
023
024/**
025 * A matcher class that can be queried to determine if a character array
026 * portion matches.
027 * <p>
028 * This class comes complete with various factory methods.
029 * If these do not suffice, you can subclass and implement your own matcher.
030 *
031 * @since 1.0
032 * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0.
033 */
034@Deprecated
035public abstract class StrMatcher {
036
037    /**
038     * Matches the comma character.
039     */
040    private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
041
042    /**
043     * Matches the tab character.
044     */
045    private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
046
047    /**
048     * Matches the space character.
049     */
050    private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
051
052    /**
053     * Matches the same characters as StringTokenizer,
054     * namely space, tab, newline, form feed.
055     */
056    private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
057
058    /**
059     * Matches the String trim() whitespace characters.
060     */
061    private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
062
063    /**
064     * Matches the double quote character.
065     */
066    private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
067
068    /**
069     * Matches the double quote character.
070     */
071    private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
072
073    /**
074     * Matches the single or double quote character.
075     */
076    private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
077
078    /**
079     * Matches no characters.
080     */
081    private static final StrMatcher NONE_MATCHER = new NoMatcher();
082
083    // -----------------------------------------------------------------------
084
085    /**
086     * Returns a matcher which matches the comma character.
087     *
088     * @return a matcher for a comma
089     */
090    public static StrMatcher commaMatcher() {
091        return COMMA_MATCHER;
092    }
093
094    /**
095     * Returns a matcher which matches the tab character.
096     *
097     * @return a matcher for a tab
098     */
099    public static StrMatcher tabMatcher() {
100        return TAB_MATCHER;
101    }
102
103    /**
104     * Returns a matcher which matches the space character.
105     *
106     * @return a matcher for a space
107     */
108    public static StrMatcher spaceMatcher() {
109        return SPACE_MATCHER;
110    }
111
112    /**
113     * Matches the same characters as StringTokenizer,
114     * namely space, tab, newline and form feed.
115     *
116     * @return The split matcher
117     */
118    public static StrMatcher splitMatcher() {
119        return SPLIT_MATCHER;
120    }
121
122    /**
123     * Matches the String trim() whitespace characters.
124     *
125     * @return The trim matcher
126     */
127    public static StrMatcher trimMatcher() {
128        return TRIM_MATCHER;
129    }
130
131    /**
132     * Returns a matcher which matches the single quote character.
133     *
134     * @return a matcher for a single quote
135     */
136    public static StrMatcher singleQuoteMatcher() {
137        return SINGLE_QUOTE_MATCHER;
138    }
139
140    /**
141     * Returns a matcher which matches the double quote character.
142     *
143     * @return a matcher for a double quote
144     */
145    public static StrMatcher doubleQuoteMatcher() {
146        return DOUBLE_QUOTE_MATCHER;
147    }
148
149    /**
150     * Returns a matcher which matches the single or double quote character.
151     *
152     * @return a matcher for a single or double quote
153     */
154    public static StrMatcher quoteMatcher() {
155        return QUOTE_MATCHER;
156    }
157
158    /**
159     * Matches no characters.
160     *
161     * @return a matcher that matches nothing
162     */
163    public static StrMatcher noneMatcher() {
164        return NONE_MATCHER;
165    }
166
167    /**
168     * Creates a matcher from a character.
169     *
170     * @param ch  the character to match, must not be null
171     * @return a new Matcher for the given char
172     */
173    public static StrMatcher charMatcher(final char ch) {
174        return new CharMatcher(ch);
175    }
176
177    /**
178     * Creates a matcher from a set of characters.
179     *
180     * @param chars  the characters to match, null or empty matches nothing
181     * @return a new matcher for the given char[]
182     */
183    public static StrMatcher charSetMatcher(final char... chars) {
184        if (ArrayUtils.isEmpty(chars)) {
185            return NONE_MATCHER;
186        }
187        if (chars.length == 1) {
188            return new CharMatcher(chars[0]);
189        }
190        return new CharSetMatcher(chars);
191    }
192
193    /**
194     * Creates a matcher from a string representing a set of characters.
195     *
196     * @param chars  the characters to match, null or empty matches nothing
197     * @return a new Matcher for the given characters
198     */
199    public static StrMatcher charSetMatcher(final String chars) {
200        if (chars == null || chars.length() == 0) {
201            return NONE_MATCHER;
202        }
203        if (chars.length() == 1) {
204            return new CharMatcher(chars.charAt(0));
205        }
206        return new CharSetMatcher(chars.toCharArray());
207    }
208
209    /**
210     * Creates a matcher from a string.
211     *
212     * @param str  the string to match, null or empty matches nothing
213     * @return a new Matcher for the given String
214     */
215    public static StrMatcher stringMatcher(final String str) {
216        if (str == null || str.length() == 0) {
217            return NONE_MATCHER;
218        }
219        return new StringMatcher(str);
220    }
221
222    //-----------------------------------------------------------------------
223    /**
224     * Constructor.
225     */
226    protected StrMatcher() {
227        super();
228    }
229
230    /**
231     * Returns the number of matching characters, or zero if there is no match.
232     * <p>
233     * This method is called to check for a match.
234     * The parameter {@code pos} represents the current position to be
235     * checked in the string {@code buffer} (a character array which must
236     * not be changed).
237     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
238     * <p>
239     * The character array may be larger than the active area to be matched.
240     * Only values in the buffer between the specified indices may be accessed.
241     * <p>
242     * The matching code may check one character or many.
243     * It may check characters preceding {@code pos} as well as those
244     * after, so long as no checks exceed the bounds specified.
245     * <p>
246     * It must return zero for no match, or a positive number if a match was found.
247     * The number indicates the number of characters that matched.
248     *
249     * @param buffer  the text content to match against, do not change
250     * @param pos  the starting position for the match, valid for buffer
251     * @param bufferStart  the first active index in the buffer, valid for buffer
252     * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
253     * @return The number of matching characters, or zero if there is no match
254     */
255    public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
256
257    /**
258     * Returns the number of matching characters, or zero if there is no match.
259     * <p>
260     * This method is called to check for a match.
261     * The parameter {@code pos} represents the current position to be
262     * checked in the string {@code buffer} (a character array which must
263     * not be changed).
264     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
265     * <p>
266     * The matching code may check one character or many.
267     * It may check characters preceding {@code pos} as well as those after.
268     * <p>
269     * It must return zero for no match, or a positive number if a match was found.
270     * The number indicates the number of characters that matched.
271     *
272     * @param buffer  the text content to match against, do not change
273     * @param pos  the starting position for the match, valid for buffer
274     * @return The number of matching characters, or zero if there is no match
275     */
276    public int isMatch(final char[] buffer, final int pos) {
277        return isMatch(buffer, pos, 0, buffer.length);
278    }
279
280    //-----------------------------------------------------------------------
281    /**
282     * Class used to define a set of characters for matching purposes.
283     */
284    static final class CharSetMatcher extends StrMatcher {
285        /** The set of characters to match. */
286        private final char[] chars;
287
288        /**
289         * Constructor that creates a matcher from a character array.
290         *
291         * @param chars  the characters to match, must not be null
292         */
293        CharSetMatcher(final char[] chars) {
294            super();
295            this.chars = chars.clone();
296            Arrays.sort(this.chars);
297        }
298
299        /**
300         * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
301         *
302         * @param buffer  the text content to match against, do not change
303         * @param pos  the starting position for the match, valid for buffer
304         * @param bufferStart  the first active index in the buffer, valid for buffer
305         * @param bufferEnd  the end index of the active buffer, valid for buffer
306         * @return The number of matching characters, or zero if there is no match
307         */
308        @Override
309        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
310            return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
311        }
312    }
313
314    //-----------------------------------------------------------------------
315    /**
316     * Class used to define a character for matching purposes.
317     */
318    static final class CharMatcher extends StrMatcher {
319        /** The character to match. */
320        private final char ch;
321
322        /**
323         * Constructor that creates a matcher that matches a single character.
324         *
325         * @param ch  the character to match
326         */
327        CharMatcher(final char ch) {
328            super();
329            this.ch = ch;
330        }
331
332        /**
333         * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
334         *
335         * @param buffer  the text content to match against, do not change
336         * @param pos  the starting position for the match, valid for buffer
337         * @param bufferStart  the first active index in the buffer, valid for buffer
338         * @param bufferEnd  the end index of the active buffer, valid for buffer
339         * @return The number of matching characters, or zero if there is no match
340         */
341        @Override
342        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
343            return ch == buffer[pos] ? 1 : 0;
344        }
345    }
346
347    //-----------------------------------------------------------------------
348    /**
349     * Class used to define a set of characters for matching purposes.
350     */
351    static final class StringMatcher extends StrMatcher {
352        /** The string to match, as a character array. */
353        private final char[] chars;
354
355        /**
356         * Constructor that creates a matcher from a String.
357         *
358         * @param str  the string to match, must not be null
359         */
360        StringMatcher(final String str) {
361            super();
362            chars = str.toCharArray();
363        }
364
365        /**
366         * Returns the number of matching characters, or zero if there is no match.
367         *
368         * @param buffer  the text content to match against, do not change
369         * @param pos  the starting position for the match, valid for buffer
370         * @param bufferStart  the first active index in the buffer, valid for buffer
371         * @param bufferEnd  the end index of the active buffer, valid for buffer
372         * @return The number of matching characters, or zero if there is no match
373         */
374        @Override
375        public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
376            final int len = chars.length;
377            if (pos + len > bufferEnd) {
378                return 0;
379            }
380            for (int i = 0; i < chars.length; i++, pos++) {
381                if (chars[i] != buffer[pos]) {
382                    return 0;
383                }
384            }
385            return len;
386        }
387
388        @Override
389        public String toString() {
390            return super.toString() + ' ' + Arrays.toString(chars);
391        }
392
393    }
394
395    //-----------------------------------------------------------------------
396    /**
397     * Class used to match no characters.
398     */
399    static final class NoMatcher extends StrMatcher {
400
401        /**
402         * Constructs a new instance of {@code NoMatcher}.
403         */
404        NoMatcher() {
405            super();
406        }
407
408        /**
409         * Always returns {@code 0}.
410         *
411         * @param buffer  the text content to match against, do not change
412         * @param pos  the starting position for the match, valid for buffer
413         * @param bufferStart  the first active index in the buffer, valid for buffer
414         * @param bufferEnd  the end index of the active buffer, valid for buffer
415         * @return The number of matching characters, or zero if there is no match
416         */
417        @Override
418        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
419            return 0;
420        }
421    }
422
423    //-----------------------------------------------------------------------
424    /**
425     * Class used to match whitespace as per trim().
426     */
427    static final class TrimMatcher extends StrMatcher {
428
429        /**
430         * Constructs a new instance of {@code TrimMatcher}.
431         */
432        TrimMatcher() {
433            super();
434        }
435
436        /**
437         * Returns whether or not the given character matches.
438         *
439         * @param buffer  the text content to match against, do not change
440         * @param pos  the starting position for the match, valid for buffer
441         * @param bufferStart  the first active index in the buffer, valid for buffer
442         * @param bufferEnd  the end index of the active buffer, valid for buffer
443         * @return The number of matching characters, or zero if there is no match
444         */
445        @Override
446        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
447            return buffer[pos] <= 32 ? 1 : 0;
448        }
449    }
450
451}