001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.Arrays;
020
021import org.apache.commons.lang3.ArrayUtils;
022import org.apache.commons.text.matcher.StringMatcherFactory;
023
024/**
025 * A matcher class that can be queried to determine if a character array
026 * portion matches.
027 * <p>
028 * This class comes complete with various factory methods.
029 * If these do not suffice, you can subclass and implement your own matcher.
030 * </p>
031 *
032 * @since 1.0
033 * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0.
034 */
035@Deprecated
036public abstract class StrMatcher {
037
038    /**
039     * Class used to define a character for matching purposes.
040     */
041    static final class CharMatcher extends StrMatcher {
042        /** The character to match. */
043        private final char ch;
044
045        /**
046         * Constructor that creates a matcher that matches a single character.
047         *
048         * @param ch  the character to match
049         */
050        CharMatcher(final char ch) {
051            this.ch = ch;
052        }
053
054        /**
055         * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
056         *
057         * @param buffer  the text content to match against, do not change
058         * @param pos  the starting position for the match, valid for buffer
059         * @param bufferStart  the first active index in the buffer, valid for buffer
060         * @param bufferEnd  the end index of the active buffer, valid for buffer
061         * @return The number of matching characters, or zero if there is no match
062         */
063        @Override
064        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
065            return ch == buffer[pos] ? 1 : 0;
066        }
067    }
068
069    /**
070     * Class used to define a set of characters for matching purposes.
071     */
072    static final class CharSetMatcher extends StrMatcher {
073        /** The set of characters to match. */
074        private final char[] chars;
075
076        /**
077         * Constructor that creates a matcher from a character array.
078         *
079         * @param chars  the characters to match, must not be null
080         */
081        CharSetMatcher(final char[] chars) {
082            this.chars = chars.clone();
083            Arrays.sort(this.chars);
084        }
085
086        /**
087         * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
088         *
089         * @param buffer  the text content to match against, do not change
090         * @param pos  the starting position for the match, valid for buffer
091         * @param bufferStart  the first active index in the buffer, valid for buffer
092         * @param bufferEnd  the end index of the active buffer, valid for buffer
093         * @return The number of matching characters, or zero if there is no match
094         */
095        @Override
096        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
097            return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
098        }
099    }
100
101    /**
102     * Class used to match no characters.
103     */
104    static final class NoMatcher extends StrMatcher {
105
106        /**
107         * Constructs a new instance of {@code NoMatcher}.
108         */
109        NoMatcher() {
110        }
111
112        /**
113         * Always returns {@code 0}.
114         *
115         * @param buffer  the text content to match against, do not change
116         * @param pos  the starting position for the match, valid for buffer
117         * @param bufferStart  the first active index in the buffer, valid for buffer
118         * @param bufferEnd  the end index of the active buffer, valid for buffer
119         * @return The number of matching characters, or zero if there is no match
120         */
121        @Override
122        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
123            return 0;
124        }
125    }
126
127    /**
128     * Class used to define a set of characters for matching purposes.
129     */
130    static final class StringMatcher extends StrMatcher {
131        /** The string to match, as a character array. */
132        private final char[] chars;
133
134        /**
135         * Constructor that creates a matcher from a String.
136         *
137         * @param str  the string to match, must not be null
138         */
139        StringMatcher(final String str) {
140            chars = str.toCharArray();
141        }
142
143        /**
144         * Returns the number of matching characters, or zero if there is no match.
145         *
146         * @param buffer  the text content to match against, do not change
147         * @param pos  the starting position for the match, valid for buffer
148         * @param bufferStart  the first active index in the buffer, valid for buffer
149         * @param bufferEnd  the end index of the active buffer, valid for buffer
150         * @return The number of matching characters, or zero if there is no match
151         */
152        @Override
153        public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
154            final int len = chars.length;
155            if (pos + len > bufferEnd) {
156                return 0;
157            }
158            for (int i = 0; i < chars.length; i++, pos++) {
159                if (chars[i] != buffer[pos]) {
160                    return 0;
161                }
162            }
163            return len;
164        }
165
166        @Override
167        public String toString() {
168            return super.toString() + ' ' + Arrays.toString(chars);
169        }
170
171    }
172
173    /**
174     * Class used to match whitespace as per trim().
175     */
176    static final class TrimMatcher extends StrMatcher {
177
178        /**
179         * Constructs a new instance of {@code TrimMatcher}.
180         */
181        TrimMatcher() {
182        }
183
184        /**
185         * Returns whether or not the given character matches.
186         *
187         * @param buffer  the text content to match against, do not change
188         * @param pos  the starting position for the match, valid for buffer
189         * @param bufferStart  the first active index in the buffer, valid for buffer
190         * @param bufferEnd  the end index of the active buffer, valid for buffer
191         * @return The number of matching characters, or zero if there is no match
192         */
193        @Override
194        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
195            return buffer[pos] <= 32 ? 1 : 0;
196        }
197    }
198
199    /**
200     * Matches the comma character.
201     */
202    private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
203
204    /**
205     * Matches the tab character.
206     */
207    private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
208
209    /**
210     * Matches the space character.
211     */
212    private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
213
214    /**
215     * Matches the same characters as StringTokenizer,
216     * namely space, tab, newline, form feed.
217     */
218    private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
219
220    /**
221     * Matches the String trim() whitespace characters.
222     */
223    private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
224
225    /**
226     * Matches the double quote character.
227     */
228    private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
229
230    /**
231     * Matches the double quote character.
232     */
233    private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
234
235    /**
236     * Matches the single or double quote character.
237     */
238    private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
239
240    /**
241     * Matches no characters.
242     */
243    private static final StrMatcher NONE_MATCHER = new NoMatcher();
244
245    /**
246     * Creates a matcher from a character.
247     *
248     * @param ch  the character to match, must not be null
249     * @return a new Matcher for the given char
250     */
251    public static StrMatcher charMatcher(final char ch) {
252        return new CharMatcher(ch);
253    }
254
255    /**
256     * Creates a matcher from a set of characters.
257     *
258     * @param chars  the characters to match, null or empty matches nothing
259     * @return a new matcher for the given char[]
260     */
261    public static StrMatcher charSetMatcher(final char... chars) {
262        if (ArrayUtils.isEmpty(chars)) {
263            return NONE_MATCHER;
264        }
265        if (chars.length == 1) {
266            return new CharMatcher(chars[0]);
267        }
268        return new CharSetMatcher(chars);
269    }
270
271    /**
272     * Creates a matcher from a string representing a set of characters.
273     *
274     * @param chars  the characters to match, null or empty matches nothing
275     * @return a new Matcher for the given characters
276     */
277    public static StrMatcher charSetMatcher(final String chars) {
278        if (chars == null || chars.isEmpty()) {
279            return NONE_MATCHER;
280        }
281        if (chars.length() == 1) {
282            return new CharMatcher(chars.charAt(0));
283        }
284        return new CharSetMatcher(chars.toCharArray());
285    }
286
287    /**
288     * Returns a matcher which matches the comma character.
289     *
290     * @return a matcher for a comma
291     */
292    public static StrMatcher commaMatcher() {
293        return COMMA_MATCHER;
294    }
295
296    /**
297     * Returns a matcher which matches the double quote character.
298     *
299     * @return a matcher for a double quote
300     */
301    public static StrMatcher doubleQuoteMatcher() {
302        return DOUBLE_QUOTE_MATCHER;
303    }
304
305    /**
306     * Matches no characters.
307     *
308     * @return a matcher that matches nothing
309     */
310    public static StrMatcher noneMatcher() {
311        return NONE_MATCHER;
312    }
313
314    /**
315     * Returns a matcher which matches the single or double quote character.
316     *
317     * @return a matcher for a single or double quote
318     */
319    public static StrMatcher quoteMatcher() {
320        return QUOTE_MATCHER;
321    }
322
323    /**
324     * Returns a matcher which matches the single quote character.
325     *
326     * @return a matcher for a single quote
327     */
328    public static StrMatcher singleQuoteMatcher() {
329        return SINGLE_QUOTE_MATCHER;
330    }
331
332    /**
333     * Returns a matcher which matches the space character.
334     *
335     * @return a matcher for a space
336     */
337    public static StrMatcher spaceMatcher() {
338        return SPACE_MATCHER;
339    }
340
341    /**
342     * Matches the same characters as StringTokenizer,
343     * namely space, tab, newline and form feed.
344     *
345     * @return The split matcher
346     */
347    public static StrMatcher splitMatcher() {
348        return SPLIT_MATCHER;
349    }
350
351    /**
352     * Creates a matcher from a string.
353     *
354     * @param str  the string to match, null or empty matches nothing
355     * @return a new Matcher for the given String
356     */
357    public static StrMatcher stringMatcher(final String str) {
358        if (str == null || str.isEmpty()) {
359            return NONE_MATCHER;
360        }
361        return new StringMatcher(str);
362    }
363
364    /**
365     * Returns a matcher which matches the tab character.
366     *
367     * @return a matcher for a tab
368     */
369    public static StrMatcher tabMatcher() {
370        return TAB_MATCHER;
371    }
372
373    /**
374     * Matches the String trim() whitespace characters.
375     *
376     * @return The trim matcher
377     */
378    public static StrMatcher trimMatcher() {
379        return TRIM_MATCHER;
380    }
381
382    /**
383     * Constructs a new instance.
384     */
385    protected StrMatcher() {
386    }
387
388    /**
389     * Returns the number of matching characters, or zero if there is no match.
390     * <p>
391     * This method is called to check for a match.
392     * The parameter {@code pos} represents the current position to be
393     * checked in the string {@code buffer} (a character array which must
394     * not be changed).
395     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
396     * </p>
397     * <p>
398     * The matching code may check one character or many.
399     * It may check characters preceding {@code pos} as well as those after.
400     * </p>
401     * <p>
402     * It must return zero for no match, or a positive number if a match was found.
403     * The number indicates the number of characters that matched.
404     * </p>
405     *
406     * @param buffer  the text content to match against, do not change
407     * @param pos  the starting position for the match, valid for buffer
408     * @return The number of matching characters, or zero if there is no match
409     */
410    public int isMatch(final char[] buffer, final int pos) {
411        return isMatch(buffer, pos, 0, buffer.length);
412    }
413
414    /**
415     * Returns the number of matching characters, or zero if there is no match.
416     * <p>
417     * This method is called to check for a match.
418     * The parameter {@code pos} represents the current position to be
419     * checked in the string {@code buffer} (a character array which must
420     * not be changed).
421     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
422     * </p>
423     * <p>
424     * The character array may be larger than the active area to be matched.
425     * Only values in the buffer between the specified indices may be accessed.
426     * </p>
427     * <p>
428     * The matching code may check one character or many.
429     * It may check characters preceding {@code pos} as well as those
430     * after, so long as no checks exceed the bounds specified.
431     * </p>
432     * <p>
433     * It must return zero for no match, or a positive number if a match was found.
434     * The number indicates the number of characters that matched.
435     * </p>
436     *
437     * @param buffer  the text content to match against, do not change
438     * @param pos  the starting position for the match, valid for buffer
439     * @param bufferStart  the first active index in the buffer, valid for buffer
440     * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
441     * @return The number of matching characters, or zero if there is no match
442     */
443    public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
444
445}