Rule.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.codec.language.bm;

  18. import java.util.ArrayList;
  19. import java.util.Arrays;
  20. import java.util.Collections;
  21. import java.util.Comparator;
  22. import java.util.EnumMap;
  23. import java.util.HashMap;
  24. import java.util.HashSet;
  25. import java.util.List;
  26. import java.util.Map;
  27. import java.util.Scanner;
  28. import java.util.Set;
  29. import java.util.regex.Matcher;
  30. import java.util.regex.Pattern;

  31. import org.apache.commons.codec.Resources;
  32. import org.apache.commons.codec.language.bm.Languages.LanguageSet;

  33. /**
  34.  * A phoneme rule.
  35.  * <p>
  36.  * Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply
  37.  * and a logical flag indicating if all languages must be in play. A rule matches if:
  38.  * </p>
  39.  * <ul>
  40.  * <li>the pattern matches at the current position</li>
  41.  * <li>the string up until the beginning of the pattern matches the left context</li>
  42.  * <li>the string from the end of the pattern matches the right context</li>
  43.  * <li>logical is ALL and all languages are in scope; or</li>
  44.  * <li>logical is any other value and at least one language is in scope</li>
  45.  * </ul>
  46.  * <p>
  47.  * Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user
  48.  * to explicitly construct their own.
  49.  * </p>
  50.  * <p>
  51.  * Rules are immutable and thread-safe.
  52.  * </p>
  53.  * <h2>Rules resources</h2>
  54.  * <p>
  55.  * Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically
  56.  * named following the pattern:
  57.  * </p>
  58.  * <blockquote>/org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt</blockquote>
  59.  * <p>
  60.  * The format of these resources is the following:
  61.  * </p>
  62.  * <ul>
  63.  * <li><strong>Rules:</strong> whitespace separated, double-quoted strings. There should be 4 columns to each row, and these
  64.  * will be interpreted as:
  65.  * <ol>
  66.  * <li>pattern</li>
  67.  * <li>left context</li>
  68.  * <li>right context</li>
  69.  * <li>phoneme</li>
  70.  * </ol>
  71.  * </li>
  72.  * <li><strong>End-of-line comments:</strong> Any occurrence of '//' will cause all text following on that line to be discarded
  73.  * as a comment.</li>
  74.  * <li><strong>Multi-line comments:</strong> Any line starting with '/*' will start multi-line commenting mode. This will skip
  75.  * all content until a line ending in '*' and '/' is found.</li>
  76.  * <li><strong>Blank lines:</strong> All blank lines will be skipped.</li>
  77.  * </ul>
  78.  *
  79.  * @since 1.6
  80.  */
  81. public class Rule {

  82.     /**
  83.      * A phoneme.
  84.      */
  85.     public static final class Phoneme implements PhonemeExpr {

  86.         /**
  87.          * The Phoneme Comparator.
  88.          */
  89.         public static final Comparator<Phoneme> COMPARATOR = (o1, o2) -> {
  90.             final int o1Length = o1.phonemeText.length();
  91.             final int o2Length = o2.phonemeText.length();
  92.             for (int i = 0; i < o1Length; i++) {
  93.                 if (i >= o2Length) {
  94.                     return +1;
  95.                 }
  96.                 final int c = o1.phonemeText.charAt(i) - o2.phonemeText.charAt(i);
  97.                 if (c != 0) {
  98.                     return c;
  99.                 }
  100.             }

  101.             if (o1Length < o2Length) {
  102.                 return -1;
  103.             }

  104.             return 0;
  105.         };

  106.         private final StringBuilder phonemeText;

  107.         private final Languages.LanguageSet languages;

  108.         /**
  109.          * Constructs a new instance.
  110.          *
  111.          * @param phonemeText The phoneme text.
  112.          * @param languages A language set.
  113.          */
  114.         public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) {
  115.             this.phonemeText = new StringBuilder(phonemeText);
  116.             this.languages = languages;
  117.         }

  118.         /**
  119.          * Constructs a new instance.
  120.          *
  121.          * @param phonemeLeft The left phoneme text.
  122.          * @param phonemeRight The right phoneme text.
  123.          */
  124.         public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight) {
  125.             this(phonemeLeft.phonemeText, phonemeLeft.languages);
  126.             this.phonemeText.append(phonemeRight.phonemeText);
  127.         }

  128.         /**
  129.          * Constructs a new instance.
  130.          *
  131.          * @param phonemeLeft The left phoneme text.
  132.          * @param phonemeRight The right phoneme text.
  133.          * @param languages A language set.
  134.          */
  135.         public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final Languages.LanguageSet languages) {
  136.             this(phonemeLeft.phonemeText, languages);
  137.             this.phonemeText.append(phonemeRight.phonemeText);
  138.         }

  139.         /**
  140.          * Appends the sequence to the phone text.
  141.          *
  142.          * @param sequence The sequence to append.
  143.          * @return this instance.
  144.          */
  145.         public Phoneme append(final CharSequence sequence) {
  146.             this.phonemeText.append(sequence);
  147.             return this;
  148.         }

  149.         /**
  150.          * Gets the language set.
  151.          *
  152.          * @return the language set.
  153.          */
  154.         public Languages.LanguageSet getLanguages() {
  155.             return this.languages;
  156.         }

  157.         @Override
  158.         public Iterable<Phoneme> getPhonemes() {
  159.             return Collections.singleton(this);
  160.         }

  161.         /**
  162.          * Gets the phoneme text sequence.
  163.          *
  164.          * @return the phoneme text sequence.
  165.          */
  166.         public CharSequence getPhonemeText() {
  167.             return this.phonemeText;
  168.         }

  169.         /**
  170.          * Deprecated since 1.9.
  171.          *
  172.          * @param right the Phoneme to join
  173.          * @return a new Phoneme
  174.          * @deprecated since 1.9
  175.          */
  176.         @Deprecated
  177.         public Phoneme join(final Phoneme right) {
  178.             return new Phoneme(this.phonemeText.toString() + right.phonemeText.toString(),
  179.                                this.languages.restrictTo(right.languages));
  180.         }

  181.         /**
  182.          * Returns a new Phoneme with the same text but a union of its
  183.          * current language set and the given one.
  184.          *
  185.          * @param lang the language set to merge
  186.          * @return a new Phoneme
  187.          */
  188.         public Phoneme mergeWithLanguage(final LanguageSet lang) {
  189.           return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang));
  190.         }

  191.         @Override
  192.         public int size() {
  193.             return 1;
  194.         }

  195.         @Override
  196.         public String toString() {
  197.           return phonemeText.toString() + "[" + languages + "]";
  198.         }
  199.     }

  200.     /**
  201.      * A phoneme expression.
  202.      */
  203.     public interface PhonemeExpr {

  204.         /**
  205.          * Gets an iteration of phonemes.
  206.          *
  207.          * @return an iteration of phonemes.
  208.          */
  209.         Iterable<Phoneme> getPhonemes();

  210.         /**
  211.          * Gets the expression size in phonemes.
  212.          *
  213.          * @return the expression size in phonemes.
  214.          * @since 1.17.0
  215.          */
  216.         default int size() {
  217.             // All implementations are int-bound.
  218.             return (int) Math.min(getPhonemes().spliterator().getExactSizeIfKnown(), Integer.MAX_VALUE);
  219.         }
  220.     }

  221.     /**
  222.      * A list of phonemes.
  223.      */
  224.     public static final class PhonemeList implements PhonemeExpr {

  225.         private final List<Phoneme> phonemeList;

  226.         /**
  227.          * Constructs a new instance.
  228.          *
  229.          * @param phonemes the phoneme list.
  230.          */
  231.         public PhonemeList(final List<Phoneme> phonemes) {
  232.             this.phonemeList = phonemes;
  233.         }

  234.         @Override
  235.         public List<Phoneme> getPhonemes() {
  236.             return phonemeList;
  237.         }

  238.         @Override
  239.         public int size() {
  240.             return phonemeList.size();
  241.         }
  242.     }

  243.     /**
  244.      * A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations.
  245.      */
  246.     public interface RPattern {

  247.         /**
  248.          * Tests whether the given input matches this instance.
  249.          *
  250.          * @param input the input to test.
  251.          * @return whether the given input matches this instance.
  252.          */
  253.         boolean isMatch(CharSequence input);
  254.     }

  255.     /**
  256.      * Always matches.
  257.      */
  258.     public static final RPattern ALL_STRINGS_RMATCHER = input -> true;

  259.     /**
  260.      * Unused.
  261.      */
  262.     public static final String ALL = "ALL";

  263.     private static final String DOUBLE_QUOTE = "\"";

  264.     private static final String HASH_INCLUDE = "#include";

  265.     private static final int HASH_INCLUDE_LENGTH = HASH_INCLUDE.length();

  266.     private static final Map<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>> RULES =
  267.             new EnumMap<>(NameType.class);

  268.     static {
  269.         for (final NameType s : NameType.values()) {
  270.             final Map<RuleType, Map<String, Map<String, List<Rule>>>> rts =
  271.                     new EnumMap<>(RuleType.class);

  272.             for (final RuleType rt : RuleType.values()) {
  273.                 final Map<String, Map<String, List<Rule>>> rs = new HashMap<>();

  274.                 final Languages ls = Languages.getInstance(s);
  275.                 ls.getLanguages().forEach(l -> {
  276.                     try (Scanner scanner = createScanner(s, rt, l)) {
  277.                         rs.put(l, parseRules(scanner, createResourceName(s, rt, l)));
  278.                     } catch (final IllegalStateException e) {
  279.                         throw new IllegalStateException("Problem processing " + createResourceName(s, rt, l), e);
  280.                     }
  281.                 });
  282.                 if (!rt.equals(RuleType.RULES)) {
  283.                     try (Scanner scanner = createScanner(s, rt, "common")) {
  284.                         rs.put("common", parseRules(scanner, createResourceName(s, rt, "common")));
  285.                     }
  286.                 }

  287.                 rts.put(rt, Collections.unmodifiableMap(rs));
  288.             }

  289.             RULES.put(s, Collections.unmodifiableMap(rts));
  290.         }
  291.     }

  292.     private static boolean contains(final CharSequence chars, final char input) {
  293.         return chars.chars().anyMatch(c -> c == input);
  294.     }

  295.     private static String createResourceName(final NameType nameType, final RuleType rt, final String lang) {
  296.         return String.format("/org/apache/commons/codec/language/bm/%s_%s_%s.txt",
  297.                              nameType.getName(), rt.getName(), lang);
  298.     }

  299.     @SuppressWarnings("resource") // Closing the Scanner closes the resource
  300.     private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) {
  301.         final String resName = createResourceName(nameType, rt, lang);
  302.         return new Scanner(Resources.getInputStream(resName), ResourceConstants.ENCODING);
  303.     }

  304.     @SuppressWarnings("resource") // Closing the Scanner closes the resource
  305.     private static Scanner createScanner(final String lang) {
  306.         final String resName = String.format("/org/apache/commons/codec/language/bm/%s.txt", lang);
  307.         return new Scanner(Resources.getInputStream(resName), ResourceConstants.ENCODING);
  308.     }

  309.     private static boolean endsWith(final CharSequence input, final CharSequence suffix) {
  310.         final int suffixLength = suffix.length();
  311.         final int inputLength = input.length();

  312.         if (suffixLength > inputLength) {
  313.             return false;
  314.         }
  315.         for (int i = inputLength - 1, j = suffixLength - 1; j >= 0; i--, j--) {
  316.             if (input.charAt(i) != suffix.charAt(j)) {
  317.                 return false;
  318.             }
  319.         }
  320.         return true;
  321.     }

  322.     /**
  323.      * Gets rules for a combination of name type, rule type and languages.
  324.      *
  325.      * @param nameType
  326.      *            the NameType to consider
  327.      * @param rt
  328.      *            the RuleType to consider
  329.      * @param langs
  330.      *            the set of languages to consider
  331.      * @return a list of Rules that apply
  332.      */
  333.     public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
  334.                                          final Languages.LanguageSet langs) {
  335.         final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs);
  336.         final List<Rule> allRules = new ArrayList<>();
  337.         ruleMap.values().forEach(rules -> allRules.addAll(rules));
  338.         return allRules;
  339.     }

  340.     /**
  341.      * Gets rules for a combination of name type, rule type and a single language.
  342.      *
  343.      * @param nameType
  344.      *            the NameType to consider
  345.      * @param rt
  346.      *            the RuleType to consider
  347.      * @param lang
  348.      *            the language to consider
  349.      * @return a list of Rules that apply
  350.      */
  351.     public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) {
  352.         return getInstance(nameType, rt, LanguageSet.from(new HashSet<>(Arrays.asList(lang))));
  353.     }

  354.     /**
  355.      * Gets rules for a combination of name type, rule type and languages.
  356.      *
  357.      * @param nameType
  358.      *            the NameType to consider
  359.      * @param rt
  360.      *            the RuleType to consider
  361.      * @param langs
  362.      *            the set of languages to consider
  363.      * @return a map containing all Rules that apply, grouped by the first character of the rule pattern
  364.      * @since 1.9
  365.      */
  366.     public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt,
  367.                                                          final Languages.LanguageSet langs) {
  368.         return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) :
  369.                                      getInstanceMap(nameType, rt, Languages.ANY);
  370.     }

  371.     /**
  372.      * Gets rules for a combination of name type, rule type and a single language.
  373.      *
  374.      * @param nameType
  375.      *            the NameType to consider
  376.      * @param rt
  377.      *            the RuleType to consider
  378.      * @param lang
  379.      *            the language to consider
  380.      * @return a map containing all Rules that apply, grouped by the first character of the rule pattern
  381.      * @since 1.9
  382.      */
  383.     public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt,
  384.                                                          final String lang) {
  385.         final Map<String, List<Rule>> rules = RULES.get(nameType).get(rt).get(lang);

  386.         if (rules == null) {
  387.             throw new IllegalArgumentException(String.format("No rules found for %s, %s, %s.",
  388.                                                nameType.getName(), rt.getName(), lang));
  389.         }

  390.         return rules;
  391.     }

  392.     private static Phoneme parsePhoneme(final String ph) {
  393.         final int open = ph.indexOf("[");
  394.         if (open >= 0) {
  395.             if (!ph.endsWith("]")) {
  396.                 throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'");
  397.             }
  398.             final String before = ph.substring(0, open);
  399.             final String in = ph.substring(open + 1, ph.length() - 1);
  400.             final Set<String> langs = new HashSet<>(Arrays.asList(in.split("[+]")));

  401.             return new Phoneme(before, Languages.LanguageSet.from(langs));
  402.         }
  403.         return new Phoneme(ph, Languages.ANY_LANGUAGE);
  404.     }

  405.     private static PhonemeExpr parsePhonemeExpr(final String ph) {
  406.         if (ph.startsWith("(")) { // we have a bracketed list of options
  407.             if (!ph.endsWith(")")) {
  408.                 throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'");
  409.             }

  410.             final List<Phoneme> phs = new ArrayList<>();
  411.             final String body = ph.substring(1, ph.length() - 1);
  412.             for (final String part : body.split("[|]")) {
  413.                 phs.add(parsePhoneme(part));
  414.             }
  415.             if (body.startsWith("|") || body.endsWith("|")) {
  416.                 phs.add(new Phoneme("", Languages.ANY_LANGUAGE));
  417.             }

  418.             return new PhonemeList(phs);
  419.         }
  420.         return parsePhoneme(ph);
  421.     }

  422.     private static Map<String, List<Rule>> parseRules(final Scanner scanner, final String location) {
  423.         final Map<String, List<Rule>> lines = new HashMap<>();
  424.         int currentLine = 0;

  425.         boolean inMultilineComment = false;
  426.         while (scanner.hasNextLine()) {
  427.             currentLine++;
  428.             final String rawLine = scanner.nextLine();
  429.             String line = rawLine;

  430.             if (inMultilineComment) {
  431.                 if (line.endsWith(ResourceConstants.EXT_CMT_END)) {
  432.                     inMultilineComment = false;
  433.                 }
  434.             } else if (line.startsWith(ResourceConstants.EXT_CMT_START)) {
  435.                 inMultilineComment = true;
  436.             } else {
  437.                 // discard comments
  438.                 final int cmtI = line.indexOf(ResourceConstants.CMT);
  439.                 if (cmtI >= 0) {
  440.                     line = line.substring(0, cmtI);
  441.                 }

  442.                 // trim leading-trailing whitespace
  443.                 line = line.trim();

  444.                 if (line.isEmpty()) {
  445.                     continue; // empty lines can be safely skipped
  446.                 }

  447.                 if (line.startsWith(HASH_INCLUDE)) {
  448.                     // include statement
  449.                     final String incl = line.substring(HASH_INCLUDE_LENGTH).trim();
  450.                     if (incl.contains(" ")) {
  451.                         throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " +
  452.                                                            location);
  453.                     }
  454.                     try (Scanner hashIncludeScanner = createScanner(incl)) {
  455.                         lines.putAll(parseRules(hashIncludeScanner, location + "->" + incl));
  456.                     }
  457.                 } else {
  458.                     // rule
  459.                     final String[] parts = line.split("\\s+");
  460.                     if (parts.length != 4) {
  461.                         throw new IllegalArgumentException("Malformed rule statement split into " + parts.length +
  462.                                                            " parts: " + rawLine + " in " + location);
  463.                     }
  464.                     try {
  465.                         final String pat = stripQuotes(parts[0]);
  466.                         final String lCon = stripQuotes(parts[1]);
  467.                         final String rCon = stripQuotes(parts[2]);
  468.                         final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3]));
  469.                         final int cLine = currentLine;
  470.                         final Rule r = new Rule(pat, lCon, rCon, ph) {
  471.                             private final int myLine = cLine;
  472.                             private final String loc = location;

  473.                             @Override
  474.                             public String toString() {
  475.                                 final StringBuilder sb = new StringBuilder();
  476.                                 sb.append("Rule");
  477.                                 sb.append("{line=").append(myLine);
  478.                                 sb.append(", loc='").append(loc).append('\'');
  479.                                 sb.append(", pat='").append(pat).append('\'');
  480.                                 sb.append(", lcon='").append(lCon).append('\'');
  481.                                 sb.append(", rcon='").append(rCon).append('\'');
  482.                                 sb.append('}');
  483.                                 return sb.toString();
  484.                             }
  485.                         };
  486.                         final String patternKey = r.pattern.substring(0, 1);
  487.                         final List<Rule> rules = lines.computeIfAbsent(patternKey, k -> new ArrayList<>());
  488.                         rules.add(r);
  489.                     } catch (final IllegalArgumentException e) {
  490.                         throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " +
  491.                                                         location, e);
  492.                     }
  493.                 }
  494.             }
  495.         }

  496.         return lines;
  497.     }

  498.     /**
  499.      * Attempts to compile the regex into direct string ops, falling back to Pattern and Matcher in the worst case.
  500.      *
  501.      * @param regex
  502.      *            the regular expression to compile
  503.      * @return an RPattern that will match this regex
  504.      */
  505.     private static RPattern pattern(final String regex) {
  506.         final boolean startsWith = regex.startsWith("^");
  507.         final boolean endsWith = regex.endsWith("$");
  508.         final String content = regex.substring(startsWith ? 1 : 0, endsWith ? regex.length() - 1 : regex.length());
  509.         final boolean boxes = content.contains("[");

  510.         if (!boxes) {
  511.             if (startsWith && endsWith) {
  512.                 // exact match
  513.                 if (content.isEmpty()) {
  514.                     // empty
  515.                     return input -> input.length() == 0;
  516.                 }
  517.                 return input -> input.equals(content);
  518.             }
  519.             if ((startsWith || endsWith) && content.isEmpty()) {
  520.                 // matches every string
  521.                 return ALL_STRINGS_RMATCHER;
  522.             }
  523.             if (startsWith) {
  524.                 // matches from start
  525.                 return input -> startsWith(input, content);
  526.             }
  527.             if (endsWith) {
  528.                 // matches from start
  529.                 return input -> endsWith(input, content);
  530.             }
  531.         } else {
  532.             final boolean startsWithBox = content.startsWith("[");
  533.             final boolean endsWithBox = content.endsWith("]");

  534.             if (startsWithBox && endsWithBox) {
  535.                 String boxContent = content.substring(1, content.length() - 1);
  536.                 if (!boxContent.contains("[")) {
  537.                     // box containing alternatives
  538.                     final boolean negate = boxContent.startsWith("^");
  539.                     if (negate) {
  540.                         boxContent = boxContent.substring(1);
  541.                     }
  542.                     final String bContent = boxContent;
  543.                     final boolean shouldMatch = !negate;

  544.                     if (startsWith && endsWith) {
  545.                         // exact match
  546.                         return input -> input.length() == 1 && contains(bContent, input.charAt(0)) == shouldMatch;
  547.                     }
  548.                     if (startsWith) {
  549.                         // first char
  550.                         return input -> input.length() > 0 && contains(bContent, input.charAt(0)) == shouldMatch;
  551.                     }
  552.                     if (endsWith) {
  553.                         // last char
  554.                         return input -> input.length() > 0 &&
  555.                                contains(bContent, input.charAt(input.length() - 1)) == shouldMatch;
  556.                     }
  557.                 }
  558.             }
  559.         }

  560.         return new RPattern() {
  561.             final Pattern pattern = Pattern.compile(regex);

  562.             @Override
  563.             public boolean isMatch(final CharSequence input) {
  564.                 final Matcher matcher = pattern.matcher(input);
  565.                 return matcher.find();
  566.             }
  567.         };
  568.     }

  569.     private static boolean startsWith(final CharSequence input, final CharSequence prefix) {
  570.         if (prefix.length() > input.length()) {
  571.             return false;
  572.         }
  573.         for (int i = 0; i < prefix.length(); i++) {
  574.             if (input.charAt(i) != prefix.charAt(i)) {
  575.                 return false;
  576.             }
  577.         }
  578.         return true;
  579.     }

  580.     private static String stripQuotes(String str) {
  581.         if (str.startsWith(DOUBLE_QUOTE)) {
  582.             str = str.substring(1);
  583.         }

  584.         if (str.endsWith(DOUBLE_QUOTE)) {
  585.             str = str.substring(0, str.length() - 1);
  586.         }

  587.         return str;
  588.     }

  589.     private final RPattern lContext;

  590.     private final String pattern;

  591.     private final PhonemeExpr phoneme;

  592.     private final RPattern rContext;

  593.     /**
  594.      * Creates a new rule.
  595.      *
  596.      * @param pattern
  597.      *            the pattern
  598.      * @param lContext
  599.      *            the left context
  600.      * @param rContext
  601.      *            the right context
  602.      * @param phoneme
  603.      *            the resulting phoneme
  604.      */
  605.     public Rule(final String pattern, final String lContext, final String rContext, final PhonemeExpr phoneme) {
  606.         this.pattern = pattern;
  607.         this.lContext = pattern(lContext + "$");
  608.         this.rContext = pattern("^" + rContext);
  609.         this.phoneme = phoneme;
  610.     }

  611.     /**
  612.      * Gets the left context. This is a regular expression that must match to the left of the pattern.
  613.      *
  614.      * @return the left context Pattern
  615.      */
  616.     public RPattern getLContext() {
  617.         return this.lContext;
  618.     }

  619.     /**
  620.      * Gets the pattern. This is a string-literal that must exactly match.
  621.      *
  622.      * @return the pattern
  623.      */
  624.     public String getPattern() {
  625.         return this.pattern;
  626.     }

  627.     /**
  628.      * Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match.
  629.      *
  630.      * @return the phoneme
  631.      */
  632.     public PhonemeExpr getPhoneme() {
  633.         return this.phoneme;
  634.     }

  635.     /**
  636.      * Gets the right context. This is a regular expression that must match to the right of the pattern.
  637.      *
  638.      * @return the right context Pattern
  639.      */
  640.     public RPattern getRContext() {
  641.         return this.rContext;
  642.     }

  643.     /**
  644.      * Decides if the pattern and context match the input starting at a position. It is a match if the
  645.      * {@code lContext} matches {@code input} up to {@code i}, {@code pattern} matches at i and
  646.      * {@code rContext} matches from the end of the match of {@code pattern} to the end of {@code input}.
  647.      *
  648.      * @param input
  649.      *            the input String
  650.      * @param i
  651.      *            the int position within the input
  652.      * @return true if the pattern and left/right context match, false otherwise
  653.      */
  654.     public boolean patternAndContextMatches(final CharSequence input, final int i) {
  655.         if (i < 0) {
  656.             throw new IndexOutOfBoundsException("Can not match pattern at negative indexes");
  657.         }

  658.         final int patternLength = this.pattern.length();
  659.         final int ipl = i + patternLength;

  660.         if (ipl > input.length()) {
  661.             // not enough room for the pattern to match
  662.             return false;
  663.         }

  664.         // evaluate the pattern, left context and right context
  665.         // fail early if any of the evaluations is not successful
  666.         if (!input.subSequence(i, ipl).equals(this.pattern)) {
  667.             return false;
  668.         }
  669.         if (!this.rContext.isMatch(input.subSequence(ipl, input.length()))) {
  670.             return false;
  671.         }
  672.         return this.lContext.isMatch(input.subSequence(0, i));
  673.     }
  674. }