001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3.text;
018
019import java.util.Arrays;
020
021import org.apache.commons.lang3.StringUtils;
022
023/**
024 * A matcher class that can be queried to determine if a character array
025 * portion matches.
026 * <p>
027 * This class comes complete with various factory methods.
028 * If these do not suffice, you can subclass and implement your own matcher.
029 *
030 * @since 2.2
031 * @deprecated as of 3.6, use commons-text
032 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringMatcherFactory.html">
033 * StringMatcherFactory</a> instead
034 */
035@Deprecated
036public abstract class StrMatcher {
037
038    /**
039     * Matches the comma character.
040     */
041    private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
042    /**
043     * Matches the tab character.
044     */
045    private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
046    /**
047     * Matches the space character.
048     */
049    private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
050    /**
051     * Matches the same characters as StringTokenizer,
052     * namely space, tab, newline, formfeed.
053     */
054    private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
055    /**
056     * Matches the String trim() whitespace characters.
057     */
058    private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
059    /**
060     * Matches the double quote character.
061     */
062    private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
063    /**
064     * Matches the double quote character.
065     */
066    private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
067    /**
068     * Matches the single or double quote character.
069     */
070    private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
071    /**
072     * Matches no characters.
073     */
074    private static final StrMatcher NONE_MATCHER = new NoMatcher();
075
076    // -----------------------------------------------------------------------
077
078    /**
079     * Returns a matcher which matches the comma character.
080     *
081     * @return a matcher for a comma
082     */
083    public static StrMatcher commaMatcher() {
084        return COMMA_MATCHER;
085    }
086
087    /**
088     * Returns a matcher which matches the tab character.
089     *
090     * @return a matcher for a tab
091     */
092    public static StrMatcher tabMatcher() {
093        return TAB_MATCHER;
094    }
095
096    /**
097     * Returns a matcher which matches the space character.
098     *
099     * @return a matcher for a space
100     */
101    public static StrMatcher spaceMatcher() {
102        return SPACE_MATCHER;
103    }
104
105    /**
106     * Matches the same characters as StringTokenizer,
107     * namely space, tab, newline and formfeed.
108     *
109     * @return the split matcher
110     */
111    public static StrMatcher splitMatcher() {
112        return SPLIT_MATCHER;
113    }
114
115    /**
116     * Matches the String trim() whitespace characters.
117     *
118     * @return the trim matcher
119     */
120    public static StrMatcher trimMatcher() {
121        return TRIM_MATCHER;
122    }
123
124    /**
125     * Returns a matcher which matches the single quote character.
126     *
127     * @return a matcher for a single quote
128     */
129    public static StrMatcher singleQuoteMatcher() {
130        return SINGLE_QUOTE_MATCHER;
131    }
132
133    /**
134     * Returns a matcher which matches the double quote character.
135     *
136     * @return a matcher for a double quote
137     */
138    public static StrMatcher doubleQuoteMatcher() {
139        return DOUBLE_QUOTE_MATCHER;
140    }
141
142    /**
143     * Returns a matcher which matches the single or double quote character.
144     *
145     * @return a matcher for a single or double quote
146     */
147    public static StrMatcher quoteMatcher() {
148        return QUOTE_MATCHER;
149    }
150
151    /**
152     * Matches no characters.
153     *
154     * @return a matcher that matches nothing
155     */
156    public static StrMatcher noneMatcher() {
157        return NONE_MATCHER;
158    }
159
160    /**
161     * Constructor that creates a matcher from a character.
162     *
163     * @param ch  the character to match, must not be null
164     * @return a new Matcher for the given char
165     */
166    public static StrMatcher charMatcher(final char ch) {
167        return new CharMatcher(ch);
168    }
169
170    /**
171     * Constructor that creates a matcher from a set of characters.
172     *
173     * @param chars  the characters to match, null or empty matches nothing
174     * @return a new matcher for the given char[]
175     */
176    public static StrMatcher charSetMatcher(final char... chars) {
177        if (chars == null || chars.length == 0) {
178            return NONE_MATCHER;
179        }
180        if (chars.length == 1) {
181            return new CharMatcher(chars[0]);
182        }
183        return new CharSetMatcher(chars);
184    }
185
186    /**
187     * Constructor that creates a matcher from a string representing a set of characters.
188     *
189     * @param chars  the characters to match, null or empty matches nothing
190     * @return a new Matcher for the given characters
191     */
192    public static StrMatcher charSetMatcher(final String chars) {
193        if (StringUtils.isEmpty(chars)) {
194            return NONE_MATCHER;
195        }
196        if (chars.length() == 1) {
197            return new CharMatcher(chars.charAt(0));
198        }
199        return new CharSetMatcher(chars.toCharArray());
200    }
201
202    /**
203     * Constructor that creates a matcher from a string.
204     *
205     * @param str  the string to match, null or empty matches nothing
206     * @return a new Matcher for the given String
207     */
208    public static StrMatcher stringMatcher(final String str) {
209        if (StringUtils.isEmpty(str)) {
210            return NONE_MATCHER;
211        }
212        return new StringMatcher(str);
213    }
214
215    //-----------------------------------------------------------------------
216    /**
217     * Constructor.
218     */
219    protected StrMatcher() {
220        super();
221    }
222
223    /**
224     * Returns the number of matching characters, zero for no match.
225     * <p>
226     * This method is called to check for a match.
227     * The parameter {@code pos} represents the current position to be
228     * checked in the string {@code buffer} (a character array which must
229     * not be changed).
230     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
231     * <p>
232     * The character array may be larger than the active area to be matched.
233     * Only values in the buffer between the specified indices may be accessed.
234     * <p>
235     * The matching code may check one character or many.
236     * It may check characters preceding {@code pos} as well as those
237     * after, so long as no checks exceed the bounds specified.
238     * <p>
239     * It must return zero for no match, or a positive number if a match was found.
240     * The number indicates the number of characters that matched.
241     *
242     * @param buffer  the text content to match against, do not change
243     * @param pos  the starting position for the match, valid for buffer
244     * @param bufferStart  the first active index in the buffer, valid for buffer
245     * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
246     * @return the number of matching characters, zero for no match
247     */
248    public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
249
250    /**
251     * Returns the number of matching characters, zero for no match.
252     * <p>
253     * This method is called to check for a match.
254     * The parameter {@code pos} represents the current position to be
255     * checked in the string {@code buffer} (a character array which must
256     * not be changed).
257     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
258     * <p>
259     * The matching code may check one character or many.
260     * It may check characters preceding {@code pos} as well as those after.
261     * <p>
262     * It must return zero for no match, or a positive number if a match was found.
263     * The number indicates the number of characters that matched.
264     *
265     * @param buffer  the text content to match against, do not change
266     * @param pos  the starting position for the match, valid for buffer
267     * @return the number of matching characters, zero for no match
268     * @since 2.4
269     */
270    public int isMatch(final char[] buffer, final int pos) {
271        return isMatch(buffer, pos, 0, buffer.length);
272    }
273
274    //-----------------------------------------------------------------------
275    /**
276     * Class used to define a set of characters for matching purposes.
277     */
278    static final class CharSetMatcher extends StrMatcher {
279        /** The set of characters to match. */
280        private final char[] chars;
281
282        /**
283         * Constructor that creates a matcher from a character array.
284         *
285         * @param chars  the characters to match, must not be null
286         */
287        CharSetMatcher(final char[] chars) {
288            super();
289            this.chars = chars.clone();
290            Arrays.sort(this.chars);
291        }
292
293        /**
294         * Returns whether or not the given character matches.
295         *
296         * @param buffer  the text content to match against, do not change
297         * @param pos  the starting position for the match, valid for buffer
298         * @param bufferStart  the first active index in the buffer, valid for buffer
299         * @param bufferEnd  the end index of the active buffer, valid for buffer
300         * @return the number of matching characters, zero for no match
301         */
302        @Override
303        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
304            return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
305        }
306    }
307
308    //-----------------------------------------------------------------------
309    /**
310     * Class used to define a character for matching purposes.
311     */
312    static final class CharMatcher extends StrMatcher {
313        /** The character to match. */
314        private final char ch;
315
316        /**
317         * Constructor that creates a matcher that matches a single character.
318         *
319         * @param ch  the character to match
320         */
321        CharMatcher(final char ch) {
322            super();
323            this.ch = ch;
324        }
325
326        /**
327         * Returns whether or not the given character matches.
328         *
329         * @param buffer  the text content to match against, do not change
330         * @param pos  the starting position for the match, valid for buffer
331         * @param bufferStart  the first active index in the buffer, valid for buffer
332         * @param bufferEnd  the end index of the active buffer, valid for buffer
333         * @return the number of matching characters, zero for no match
334         */
335        @Override
336        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
337            return ch == buffer[pos] ? 1 : 0;
338        }
339    }
340
341    //-----------------------------------------------------------------------
342    /**
343     * Class used to define a set of characters for matching purposes.
344     */
345    static final class StringMatcher extends StrMatcher {
346        /** The string to match, as a character array. */
347        private final char[] chars;
348
349        /**
350         * Constructor that creates a matcher from a String.
351         *
352         * @param str  the string to match, must not be null
353         */
354        StringMatcher(final String str) {
355            super();
356            chars = str.toCharArray();
357        }
358
359        /**
360         * Returns whether or not the given text matches the stored string.
361         *
362         * @param buffer  the text content to match against, do not change
363         * @param pos  the starting position for the match, valid for buffer
364         * @param bufferStart  the first active index in the buffer, valid for buffer
365         * @param bufferEnd  the end index of the active buffer, valid for buffer
366         * @return the number of matching characters, zero for no match
367         */
368        @Override
369        public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
370            final int len = chars.length;
371            if (pos + len > bufferEnd) {
372                return 0;
373            }
374            for (int i = 0; i < chars.length; i++, pos++) {
375                if (chars[i] != buffer[pos]) {
376                    return 0;
377                }
378            }
379            return len;
380        }
381
382        @Override
383        public String toString() {
384            return super.toString() + ' ' + Arrays.toString(chars);
385        }
386
387    }
388
389    //-----------------------------------------------------------------------
390    /**
391     * Class used to match no characters.
392     */
393    static final class NoMatcher extends StrMatcher {
394
395        /**
396         * Constructs a new instance of {@code NoMatcher}.
397         */
398        NoMatcher() {
399            super();
400        }
401
402        /**
403         * Always returns {@code false}.
404         *
405         * @param buffer  the text content to match against, do not change
406         * @param pos  the starting position for the match, valid for buffer
407         * @param bufferStart  the first active index in the buffer, valid for buffer
408         * @param bufferEnd  the end index of the active buffer, valid for buffer
409         * @return the number of matching characters, zero for no match
410         */
411        @Override
412        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
413            return 0;
414        }
415    }
416
417    //-----------------------------------------------------------------------
418    /**
419     * Class used to match whitespace as per trim().
420     */
421    static final class TrimMatcher extends StrMatcher {
422
423        /**
424         * Constructs a new instance of {@code TrimMatcher}.
425         */
426        TrimMatcher() {
427            super();
428        }
429
430        /**
431         * Returns whether or not the given character matches.
432         *
433         * @param buffer  the text content to match against, do not change
434         * @param pos  the starting position for the match, valid for buffer
435         * @param bufferStart  the first active index in the buffer, valid for buffer
436         * @param bufferEnd  the end index of the active buffer, valid for buffer
437         * @return the number of matching characters, zero for no match
438         */
439        @Override
440        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
441            return buffer[pos] <= 32 ? 1 : 0;
442        }
443    }
444
445}