001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3.text;
018
019import java.util.Arrays;
020
021import org.apache.commons.lang3.ArraySorter;
022import org.apache.commons.lang3.ArrayUtils;
023import org.apache.commons.lang3.StringUtils;
024
025/**
026 * A matcher class that can be queried to determine if a character array
027 * portion matches.
028 * <p>
029 * This class comes complete with various factory methods.
030 * If these do not suffice, you can subclass and implement your own matcher.
031 * </p>
032 *
033 * @since 2.2
034 * @deprecated As of 3.6, use Apache Commons Text
035 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/matcher/StringMatcherFactory.html">
036 * StringMatcherFactory</a> instead
037 */
038@Deprecated
039public abstract class StrMatcher {
040
041    /**
042     * Class used to define a character for matching purposes.
043     */
044    static final class CharMatcher extends StrMatcher {
045        /** The character to match. */
046        private final char ch;
047
048        /**
049         * Constructor that creates a matcher that matches a single character.
050         *
051         * @param ch  the character to match
052         */
053        CharMatcher(final char ch) {
054            this.ch = ch;
055        }
056
057        /**
058         * Returns whether or not the given character matches.
059         *
060         * @param buffer  the text content to match against, do not change
061         * @param pos  the starting position for the match, valid for buffer
062         * @param bufferStart  the first active index in the buffer, valid for buffer
063         * @param bufferEnd  the end index of the active buffer, valid for buffer
064         * @return the number of matching characters, zero for no match
065         */
066        @Override
067        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
068            return ch == buffer[pos] ? 1 : 0;
069        }
070    }
071    /**
072     * Class used to define a set of characters for matching purposes.
073     */
074    static final class CharSetMatcher extends StrMatcher {
075        /** The set of characters to match. */
076        private final char[] chars;
077
078        /**
079         * Constructor that creates a matcher from a character array.
080         *
081         * @param chars  the characters to match, must not be null
082         */
083        CharSetMatcher(final char[] chars) {
084            this.chars = ArraySorter.sort(chars.clone());
085        }
086
087        /**
088         * Returns whether or not the given character matches.
089         *
090         * @param buffer  the text content to match against, do not change
091         * @param pos  the starting position for the match, valid for buffer
092         * @param bufferStart  the first active index in the buffer, valid for buffer
093         * @param bufferEnd  the end index of the active buffer, valid for buffer
094         * @return the number of matching characters, zero for no match
095         */
096        @Override
097        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
098            return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
099        }
100    }
101    /**
102     * Class used to match no characters.
103     */
104    static final class NoMatcher extends StrMatcher {
105
106        /**
107         * Constructs a new instance of {@link NoMatcher}.
108         */
109        NoMatcher() {
110        }
111
112        /**
113         * Always returns {@code false}.
114         *
115         * @param buffer  the text content to match against, do not change
116         * @param pos  the starting position for the match, valid for buffer
117         * @param bufferStart  the first active index in the buffer, valid for buffer
118         * @param bufferEnd  the end index of the active buffer, valid for buffer
119         * @return the number of matching characters, zero for no match
120         */
121        @Override
122        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
123            return 0;
124        }
125    }
126    /**
127     * Class used to define a set of characters for matching purposes.
128     */
129    static final class StringMatcher extends StrMatcher {
130        /** The string to match, as a character array. */
131        private final char[] chars;
132
133        /**
134         * Constructor that creates a matcher from a String.
135         *
136         * @param str  the string to match, must not be null
137         */
138        StringMatcher(final String str) {
139            chars = str.toCharArray();
140        }
141
142        /**
143         * Returns whether or not the given text matches the stored string.
144         *
145         * @param buffer  the text content to match against, do not change
146         * @param pos  the starting position for the match, valid for buffer
147         * @param bufferStart  the first active index in the buffer, valid for buffer
148         * @param bufferEnd  the end index of the active buffer, valid for buffer
149         * @return the number of matching characters, zero for no match
150         */
151        @Override
152        public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
153            final int len = chars.length;
154            if (pos + len > bufferEnd) {
155                return 0;
156            }
157            for (int i = 0; i < chars.length; i++, pos++) {
158                if (chars[i] != buffer[pos]) {
159                    return 0;
160                }
161            }
162            return len;
163        }
164
165        @Override
166        public String toString() {
167            return super.toString() + ' ' + Arrays.toString(chars);
168        }
169
170    }
171    /**
172     * Class used to match whitespace as per trim().
173     */
174    static final class TrimMatcher extends StrMatcher {
175
176        /**
177         * Constructs a new instance of {@link TrimMatcher}.
178         */
179        TrimMatcher() {
180        }
181
182        /**
183         * Returns whether or not the given character matches.
184         *
185         * @param buffer  the text content to match against, do not change
186         * @param pos  the starting position for the match, valid for buffer
187         * @param bufferStart  the first active index in the buffer, valid for buffer
188         * @param bufferEnd  the end index of the active buffer, valid for buffer
189         * @return the number of matching characters, zero for no match
190         */
191        @Override
192        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
193            return buffer[pos] <= 32 ? 1 : 0;
194        }
195    }
196    /**
197     * Matches the comma character.
198     */
199    private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
200    /**
201     * Matches the tab character.
202     */
203    private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
204    /**
205     * Matches the space character.
206     */
207    private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
208    /**
209     * Matches the same characters as StringTokenizer,
210     * namely space, tab, newline, formfeed.
211     */
212    private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
213
214    /**
215     * Matches the String trim() whitespace characters.
216     */
217    private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
218
219    /**
220     * Matches the double quote character.
221     */
222    private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
223
224    /**
225     * Matches the double quote character.
226     */
227    private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
228
229    /**
230     * Matches the single or double quote character.
231     */
232    private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
233
234    /**
235     * Matches no characters.
236     */
237    private static final StrMatcher NONE_MATCHER = new NoMatcher();
238
239    /**
240     * Constructor that creates a matcher from a character.
241     *
242     * @param ch  the character to match, must not be null
243     * @return a new Matcher for the given char
244     */
245    public static StrMatcher charMatcher(final char ch) {
246        return new CharMatcher(ch);
247    }
248
249    /**
250     * Constructor that creates a matcher from a set of characters.
251     *
252     * @param chars  the characters to match, null or empty matches nothing
253     * @return a new matcher for the given char[]
254     */
255    public static StrMatcher charSetMatcher(final char... chars) {
256        if (ArrayUtils.isEmpty(chars)) {
257            return NONE_MATCHER;
258        }
259        if (chars.length == 1) {
260            return new CharMatcher(chars[0]);
261        }
262        return new CharSetMatcher(chars);
263    }
264
265    /**
266     * Constructor that creates a matcher from a string representing a set of characters.
267     *
268     * @param chars  the characters to match, null or empty matches nothing
269     * @return a new Matcher for the given characters
270     */
271    public static StrMatcher charSetMatcher(final String chars) {
272        if (StringUtils.isEmpty(chars)) {
273            return NONE_MATCHER;
274        }
275        if (chars.length() == 1) {
276            return new CharMatcher(chars.charAt(0));
277        }
278        return new CharSetMatcher(chars.toCharArray());
279    }
280
281    /**
282     * Returns a matcher which matches the comma character.
283     *
284     * @return a matcher for a comma
285     */
286    public static StrMatcher commaMatcher() {
287        return COMMA_MATCHER;
288    }
289
290    /**
291     * Returns a matcher which matches the double quote character.
292     *
293     * @return a matcher for a double quote
294     */
295    public static StrMatcher doubleQuoteMatcher() {
296        return DOUBLE_QUOTE_MATCHER;
297    }
298
299    /**
300     * Matches no characters.
301     *
302     * @return a matcher that matches nothing
303     */
304    public static StrMatcher noneMatcher() {
305        return NONE_MATCHER;
306    }
307
308    /**
309     * Returns a matcher which matches the single or double quote character.
310     *
311     * @return a matcher for a single or double quote
312     */
313    public static StrMatcher quoteMatcher() {
314        return QUOTE_MATCHER;
315    }
316
317    /**
318     * Returns a matcher which matches the single quote character.
319     *
320     * @return a matcher for a single quote
321     */
322    public static StrMatcher singleQuoteMatcher() {
323        return SINGLE_QUOTE_MATCHER;
324    }
325
326    /**
327     * Returns a matcher which matches the space character.
328     *
329     * @return a matcher for a space
330     */
331    public static StrMatcher spaceMatcher() {
332        return SPACE_MATCHER;
333    }
334
335    /**
336     * Matches the same characters as StringTokenizer,
337     * namely space, tab, newline and formfeed.
338     *
339     * @return the split matcher
340     */
341    public static StrMatcher splitMatcher() {
342        return SPLIT_MATCHER;
343    }
344
345    /**
346     * Constructor that creates a matcher from a string.
347     *
348     * @param str  the string to match, null or empty matches nothing
349     * @return a new Matcher for the given String
350     */
351    public static StrMatcher stringMatcher(final String str) {
352        if (StringUtils.isEmpty(str)) {
353            return NONE_MATCHER;
354        }
355        return new StringMatcher(str);
356    }
357
358    /**
359     * Returns a matcher which matches the tab character.
360     *
361     * @return a matcher for a tab
362     */
363    public static StrMatcher tabMatcher() {
364        return TAB_MATCHER;
365    }
366
367    /**
368     * Matches the String trim() whitespace characters.
369     *
370     * @return the trim matcher
371     */
372    public static StrMatcher trimMatcher() {
373        return TRIM_MATCHER;
374    }
375
376    /**
377     * Constructs a new instance.
378     */
379    protected StrMatcher() {
380    }
381
382    /**
383     * Returns the number of matching characters, zero for no match.
384     * <p>
385     * This method is called to check for a match.
386     * The parameter {@code pos} represents the current position to be
387     * checked in the string {@code buffer} (a character array which must
388     * not be changed).
389     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
390     * </p>
391     * <p>
392     * The matching code may check one character or many.
393     * It may check characters preceding {@code pos} as well as those after.
394     * </p>
395     * <p>
396     * It must return zero for no match, or a positive number if a match was found.
397     * The number indicates the number of characters that matched.
398     * </p>
399     *
400     * @param buffer  the text content to match against, do not change
401     * @param pos  the starting position for the match, valid for buffer
402     * @return the number of matching characters, zero for no match
403     * @since 2.4
404     */
405    public int isMatch(final char[] buffer, final int pos) {
406        return isMatch(buffer, pos, 0, buffer.length);
407    }
408
409    /**
410     * Returns the number of matching characters, zero for no match.
411     * <p>
412     * This method is called to check for a match.
413     * The parameter {@code pos} represents the current position to be
414     * checked in the string {@code buffer} (a character array which must
415     * not be changed).
416     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
417     * </p>
418     * <p>
419     * The character array may be larger than the active area to be matched.
420     * Only values in the buffer between the specified indices may be accessed.
421     * </p>
422     * <p>
423     * The matching code may check one character or many.
424     * It may check characters preceding {@code pos} as well as those
425     * after, so long as no checks exceed the bounds specified.
426     * </p>
427     * <p>
428     * It must return zero for no match, or a positive number if a match was found.
429     * The number indicates the number of characters that matched.
430     * </p>
431     *
432     * @param buffer  the text content to match against, do not change
433     * @param pos  the starting position for the match, valid for buffer
434     * @param bufferStart  the first active index in the buffer, valid for buffer
435     * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
436     * @return the number of matching characters, zero for no match
437     */
438    public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
439
440}