View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.bcel.util;
20  
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.HashMap;
24  import java.util.Iterator;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.regex.Matcher;
28  import java.util.regex.Pattern;
29  
30  import org.apache.bcel.Const;
31  import org.apache.bcel.generic.ClassGenException;
32  import org.apache.bcel.generic.InstructionHandle;
33  import org.apache.bcel.generic.InstructionList;
34  import org.apache.commons.lang3.StringUtils;
35  
36  /**
37   * InstructionFinder is a tool to search for given instructions patterns, i.e., match sequences of instructions in an
38   * instruction list via regular expressions. This can be used, for example, in order to implement a peep hole optimizer that
39   * looks for code patterns and replaces them with faster equivalents.
40   *
41   * <p>
42   * This class internally uses the java.util.regex package to search for regular expressions.
43   *
44   * A typical application would look like this:
45   *
46   * <pre>
47   *
48   *
49   *   InstructionFinder f   = new InstructionFinder(il);
50   *   String            pat = &quot;IfInstruction ICONST_0 GOTO ICONST_1 NOP (IFEQ|IFNE)&quot;;
51   *
52   *   for (Iterator i = f.search(pat, constraint); i.hasNext(); ) {
53   *   InstructionHandle[] match = (InstructionHandle[])i.next();
54   *   ...
55   *   il.delete(match[1], match[5]);
56   *   ...
57   *   }
58   *
59   *
60   * </pre>
61   *
62   * @see org.apache.bcel.generic.Instruction
63   * @see InstructionList
64   */
65  public class InstructionFinder {
66  
67      /**
68       * Code patterns found may be checked using an additional user-defined constraint object whether they really match the
69       * needed criterion. I.e., check constraints that cannot expressed with regular expressions.
70       */
71      public interface CodeConstraint {
72  
73          /**
74           * @param match array of instructions matching the requested pattern
75           * @return true if the matched area is really useful
76           */
77          boolean checkCode(InstructionHandle[] match);
78      }
79  
80      private static final int OFFSET = 32767; // char + OFFSET is outside of LATIN-1
81      private static final int NO_OPCODES = 256; // Potential number, some are not used
82      private static final Map<String, String> map = new HashMap<>();
83  
84      // Initialize pattern map
85      static {
86          map.put("arithmeticinstruction",
87              "(irem|lrem|iand|ior|ineg|isub|lneg|fneg|fmul|ldiv|fadd|lxor|frem|idiv|land|ixor|ishr|fsub|lshl|fdiv|iadd|lor|dmul|lsub|ishl|imul|lmul|lushr|dneg|iushr|lshr|ddiv|drem|dadd|ladd|dsub)");
88          map.put("invokeinstruction", "(invokevirtual|invokeinterface|invokestatic|invokespecial|invokedynamic)");
89          map.put("arrayinstruction",
90              "(baload|aastore|saload|caload|fastore|lastore|iaload|castore|iastore|aaload|bastore|sastore|faload|laload|daload|dastore)");
91          map.put("gotoinstruction", "(goto|goto_w)");
92          map.put("conversioninstruction", "(d2l|l2d|i2s|d2i|l2i|i2b|l2f|d2f|f2i|i2d|i2l|f2d|i2c|f2l|i2f)");
93          map.put("localvariableinstruction", "(fstore|iinc|lload|dstore|dload|iload|aload|astore|istore|fload|lstore)");
94          map.put("loadinstruction", "(fload|dload|lload|iload|aload)");
95          map.put("fieldinstruction", "(getfield|putstatic|getstatic|putfield)");
96          map.put("cpinstruction",
97              "(ldc2_w|invokeinterface|invokedynamic|multianewarray|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|ldc_w|invokestatic|invokevirtual|putfield|ldc|new|anewarray)");
98          map.put("stackinstruction", "(dup2|swap|dup2_x2|pop|pop2|dup|dup2_x1|dup_x2|dup_x1)");
99          map.put("branchinstruction",
100             "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
101         map.put("returninstruction", "(lreturn|ireturn|freturn|dreturn|areturn|return)");
102         map.put("storeinstruction", "(istore|fstore|dstore|astore|lstore)");
103         map.put("select", "(tableswitch|lookupswitch)");
104         map.put("ifinstruction",
105             "(ifeq|ifgt|if_icmpne|if_icmpeq|ifge|ifnull|ifne|if_icmple|if_icmpge|if_acmpeq|if_icmplt|if_acmpne|ifnonnull|iflt|if_icmpgt|ifle)");
106         map.put("jsrinstruction", "(jsr|jsr_w)");
107         map.put("variablelengthinstruction", "(tableswitch|jsr|goto|lookupswitch)");
108         map.put("unconditionalbranch", "(goto|jsr|jsr_w|athrow|goto_w)");
109         map.put("constantpushinstruction", "(dconst|bipush|sipush|fconst|iconst|lconst)");
110         map.put("typedinstruction",
111             "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dastore|ret|f2d|f2i|drem|iinc|i2c|checkcast|frem|lreturn|astore|lushr|daload|dneg|fastore|istore|lshl|ldiv|lstore|areturn|ishr|ldc_w|invokeinterface|invokedynamic|aastore|lxor|ishl|l2d|i2f|return|faload|sipush|iushr|caload|instanceof|invokespecial|putfield|fmul|ireturn|laload|d2f|lneg|ixor|i2l|fdiv|lastore|multianewarray|i2b|getstatic|i2d|putstatic|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|freturn|ldc|aconst_null|castore|lmul|ldc2_w|dadd|iconst|f2l|ddiv|dstore|land|jsr|anewarray|dmul|bipush|dsub|sastore|d2i|i2s|lshr|iadd|l2i|lload|bastore|fstore|fneg|iload|fadd|baload|fconst|ior|ineg|dreturn|l2f|lconst|getfield|invokevirtual|invokestatic|iastore)");
112         map.put("popinstruction", "(fstore|dstore|pop|pop2|astore|putstatic|istore|lstore)");
113         map.put("allocationinstruction", "(multianewarray|new|anewarray|newarray)");
114         map.put("indexedinstruction",
115             "(lload|lstore|fload|ldc2_w|invokeinterface|invokedynamic|multianewarray|astore|dload|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|dstore|istore|iinc|ldc_w|ret|fstore|invokestatic|iload|putfield|invokevirtual|ldc|new|aload|anewarray)");
116         map.put("pushinstruction", "(dup|lload|dup2|bipush|fload|ldc2_w|sipush|lconst|fconst|dload|getstatic|ldc_w|aconst_null|dconst|iload|ldc|iconst|aload)");
117         map.put("stackproducer",
118             "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dup|f2d|f2i|drem|i2c|checkcast|frem|lushr|daload|dneg|lshl|ldiv|ishr|ldc_w|invokeinterface|invokedynamic|lxor|ishl|l2d|i2f|faload|sipush|iushr|caload|instanceof|invokespecial|fmul|laload|d2f|lneg|ixor|i2l|fdiv|getstatic|i2b|swap|i2d|dup2|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|ldc|arraylength|aconst_null|tableswitch|lmul|ldc2_w|iconst|dadd|f2l|ddiv|land|jsr|anewarray|dmul|bipush|dsub|d2i|newarray|i2s|lshr|iadd|lload|l2i|fneg|iload|fadd|baload|fconst|lookupswitch|ior|ineg|lconst|l2f|getfield|invokevirtual|invokestatic)");
119         map.put("stackconsumer",
120             "(imul|lsub|lor|iflt|fcmpg|if_icmpgt|iand|ifeq|if_icmplt|lrem|ifnonnull|idiv|d2l|isub|dcmpg|dastore|if_icmpeq|f2d|f2i|drem|i2c|checkcast|frem|lreturn|astore|lushr|pop2|monitorexit|dneg|fastore|istore|lshl|ldiv|lstore|areturn|if_icmpge|ishr|monitorenter|invokeinterface|invokedynamic|aastore|lxor|ishl|l2d|i2f|return|iushr|instanceof|invokespecial|fmul|ireturn|d2f|lneg|ixor|pop|i2l|ifnull|fdiv|lastore|i2b|if_acmpeq|ifge|swap|i2d|putstatic|fcmpl|ladd|irem|dcmpl|fsub|freturn|ifgt|castore|lmul|dadd|f2l|ddiv|dstore|land|if_icmpne|if_acmpne|dmul|dsub|sastore|ifle|d2i|i2s|lshr|iadd|l2i|bastore|fstore|fneg|fadd|ior|ineg|ifne|dreturn|l2f|if_icmple|getfield|invokevirtual|invokestatic|iastore)");
121         map.put("exceptionthrower",
122             "(irem|lrem|laload|putstatic|baload|dastore|areturn|getstatic|ldiv|anewarray|iastore|castore|idiv|saload|lastore|fastore|putfield|lreturn|caload|getfield|return|aastore|freturn|newarray|instanceof|multianewarray|athrow|faload|iaload|aaload|dreturn|monitorenter|checkcast|bastore|arraylength|new|invokevirtual|sastore|ldc_w|ireturn|invokespecial|monitorexit|invokeinterface|invokedynamic|ldc|invokestatic|daload)");
123         map.put("loadclass",
124             "(multianewarray|invokeinterface|invokedynamic|instanceof|invokespecial|putfield|checkcast|putstatic|invokevirtual|new|getstatic|invokestatic|getfield|anewarray)");
125         map.put("instructiontargeter",
126             "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
127         // Some aliases
128         map.put("if_icmp", "(if_icmpne|if_icmpeq|if_icmple|if_icmpge|if_icmplt|if_icmpgt)");
129         map.put("if_acmp", "(if_acmpeq|if_acmpne)");
130         map.put("if", "(ifeq|ifne|iflt|ifge|ifgt|ifle)");
131         // Precompile some aliases first
132         map.put("iconst", precompile(Const.ICONST_0, Const.ICONST_5, Const.ICONST_M1));
133         map.put("lconst", new String(new char[] {'(', makeChar(Const.LCONST_0), '|', makeChar(Const.LCONST_1), ')'}));
134         map.put("dconst", new String(new char[] {'(', makeChar(Const.DCONST_0), '|', makeChar(Const.DCONST_1), ')'}));
135         map.put("fconst", new String(new char[] {'(', makeChar(Const.FCONST_0), '|', makeChar(Const.FCONST_1), '|', makeChar(Const.FCONST_2), ')'}));
136         map.put("lload", precompile(Const.LLOAD_0, Const.LLOAD_3, Const.LLOAD));
137         map.put("iload", precompile(Const.ILOAD_0, Const.ILOAD_3, Const.ILOAD));
138         map.put("dload", precompile(Const.DLOAD_0, Const.DLOAD_3, Const.DLOAD));
139         map.put("fload", precompile(Const.FLOAD_0, Const.FLOAD_3, Const.FLOAD));
140         map.put("aload", precompile(Const.ALOAD_0, Const.ALOAD_3, Const.ALOAD));
141         map.put("lstore", precompile(Const.LSTORE_0, Const.LSTORE_3, Const.LSTORE));
142         map.put("istore", precompile(Const.ISTORE_0, Const.ISTORE_3, Const.ISTORE));
143         map.put("dstore", precompile(Const.DSTORE_0, Const.DSTORE_3, Const.DSTORE));
144         map.put("fstore", precompile(Const.FSTORE_0, Const.FSTORE_3, Const.FSTORE));
145         map.put("astore", precompile(Const.ASTORE_0, Const.ASTORE_3, Const.ASTORE));
146         // Compile strings
147         map.forEach((key, value) -> {
148             final char ch = value.charAt(1); // Omit already precompiled patterns
149             if (ch < OFFSET) {
150                 map.put(key, compilePattern(value)); // precompile all patterns
151             }
152         });
153         // Add instruction alias to match anything
154         final StringBuilder buf = new StringBuilder("(");
155         for (short i = 0; i < NO_OPCODES; i++) {
156             if (Const.getNoOfOperands(i) != Const.UNDEFINED) { // Not an invalid opcode
157                 buf.append(makeChar(i));
158                 if (i < NO_OPCODES - 1) {
159                     buf.append('|');
160                 }
161             }
162         }
163         buf.append(')');
164         map.put("instruction", buf.toString());
165     }
166 
167     /**
168      * Replace symbolic names of instructions with the appropriate character and remove all white space from string. Meta
169      * characters such as +, * are ignored.
170      *
171      * @param pattern The pattern to compile
172      * @return translated regular expression string
173      */
174     private static String compilePattern(final String pattern) {
175         // Bug: BCEL-77 - Instructions are assumed to be english, to avoid odd Locale issues
176         final String lower = StringUtils.toRootLowerCase(pattern);
177         final StringBuilder buf = new StringBuilder();
178         final int size = pattern.length();
179         for (int i = 0; i < size; i++) {
180             char ch = lower.charAt(i);
181             if (Character.isLetterOrDigit(ch)) {
182                 final StringBuilder name = new StringBuilder();
183                 while ((Character.isLetterOrDigit(ch) || ch == '_') && i < size) {
184                     name.append(ch);
185                     if (++i >= size) {
186                         break;
187                     }
188                     ch = lower.charAt(i);
189                 }
190                 i--;
191                 buf.append(mapName(name.toString()));
192             } else if (!Character.isWhitespace(ch)) {
193                 buf.append(ch);
194             }
195         }
196         return buf.toString();
197     }
198 
199     /**
200      * Convert opcode number to char.
201      */
202     private static char makeChar(final short opcode) {
203         return (char) (opcode + OFFSET);
204     }
205 
206     /**
207      * Map symbolic instruction names like "getfield" to a single character.
208      *
209      * @param pattern instruction pattern in lower case
210      * @return encoded string for a pattern such as "BranchInstruction".
211      */
212     private static String mapName(final String pattern) {
213         final String result = map.get(pattern);
214         if (result != null) {
215             return result;
216         }
217         for (short i = 0; i < NO_OPCODES; i++) {
218             if (pattern.equals(Const.getOpcodeName(i))) {
219                 return String.valueOf(makeChar(i));
220             }
221         }
222         throw new IllegalArgumentException("Instruction unknown: " + pattern);
223     }
224 
225     private static String precompile(final short from, final short to, final short extra) {
226         final StringBuilder buf = new StringBuilder("(");
227         for (short i = from; i <= to; i++) {
228             buf.append(makeChar(i));
229             buf.append('|');
230         }
231         buf.append(makeChar(extra));
232         buf.append(")");
233         return buf.toString();
234     }
235 
236     private final InstructionList il;
237 
238     private String ilString; // instruction list as string
239 
240     private InstructionHandle[] handles; // map instruction
241 
242     // list to array
243     /**
244      * @param il instruction list to search for given patterns
245      */
246     public InstructionFinder(final InstructionList il) {
247         this.il = il;
248         reread();
249     }
250 
251     /**
252      * @return the inquired instruction list
253      */
254     public final InstructionList getInstructionList() {
255         return il;
256     }
257 
258     /**
259      * @return the matched piece of code as an array of instruction (handles)
260      */
261     private InstructionHandle[] getMatch(final int matchedFrom, final int matchLength) {
262         return Arrays.copyOfRange(handles, matchedFrom, matchedFrom + matchLength);
263     }
264 
265     /**
266      * Reread the instruction list, for example, after you've altered the list upon a match.
267      */
268     public final void reread() {
269         final int size = il.getLength();
270         final char[] buf = new char[size]; // Create a string with length equal to il length
271         handles = il.getInstructionHandles();
272         // Map opcodes to characters
273         for (int i = 0; i < size; i++) {
274             buf[i] = makeChar(handles[i].getInstruction().getOpcode());
275         }
276         ilString = new String(buf);
277     }
278 
279     /**
280      * Start search beginning from the start of the given instruction list.
281      *
282      * @param pattern the instruction pattern to search for, where case is ignored
283      * @return iterator of matches where e.nextElement() returns an array of instruction handles describing the matched area
284      */
285     public final Iterator<InstructionHandle[]> search(final String pattern) {
286         return search(pattern, il.getStart(), null);
287     }
288 
289     /**
290      * Start search beginning from the start of the given instruction list. Check found matches with the constraint object.
291      *
292      * @param pattern the instruction pattern to search for, case is ignored
293      * @param constraint constraints to be checked on matching code
294      * @return instruction handle or 'null' if the match failed
295      */
296     public final Iterator<InstructionHandle[]> search(final String pattern, final CodeConstraint constraint) {
297         return search(pattern, il.getStart(), constraint);
298     }
299 
300     /**
301      * Start search beginning from 'from'.
302      *
303      * @param pattern the instruction pattern to search for, where case is ignored
304      * @param from where to start the search in the instruction list
305      * @return iterator of matches where e.nextElement() returns an array of instruction handles describing the matched area
306      */
307     public final Iterator<InstructionHandle[]> search(final String pattern, final InstructionHandle from) {
308         return search(pattern, from, null);
309     }
310 
311     /**
312      * Search for the given pattern in the instruction list. You can search for any valid opcode via its symbolic name, for example
313      * "istore". You can also use a super class or an interface name to match a whole set of instructions, for example
314      * "BranchInstruction" or "LoadInstruction". "istore" is also an alias for all "istore_x" instructions. Additional
315      * aliases are "if" for "ifxx", "if_icmp" for "if_icmpxx", "if_acmp" for "if_acmpxx".
316      *
317      * Consecutive instruction names must be separated by white space which will be removed during the compilation of the
318      * pattern.
319      *
320      * For the rest the usual pattern matching rules for regular expressions apply.
321      * <P>
322      * Example pattern:
323      *
324      * <pre>
325      * search(&quot;BranchInstruction NOP ((IfInstruction|GOTO)+ ISTORE Instruction)*&quot;);
326      * </pre>
327      *
328      * <p>
329      * If you alter the instruction list upon a match such that other matching areas are affected, you should call reread()
330      * to update the finder and call search() again, because the matches are cached.
331      *
332      * @param pattern the instruction pattern to search for, where case is ignored
333      * @param from where to start the search in the instruction list
334      * @param constraint optional CodeConstraint to check the found code pattern for user-defined constraints
335      * @return iterator of matches where e.nextElement() returns an array of instruction handles describing the matched area
336      */
337     public final Iterator<InstructionHandle[]> search(final String pattern, final InstructionHandle from, final CodeConstraint constraint) {
338         final String search = compilePattern(pattern);
339         int start = -1;
340         for (int i = 0; i < handles.length; i++) {
341             if (handles[i] == from) {
342                 start = i; // Where to start search from (index)
343                 break;
344             }
345         }
346         if (start == -1) {
347             throw new ClassGenException("Instruction handle " + from + " not found in instruction list.");
348         }
349         final Pattern regex = Pattern.compile(search);
350         final List<InstructionHandle[]> matches = new ArrayList<>();
351         final Matcher matcher = regex.matcher(ilString);
352         while (start < ilString.length() && matcher.find(start)) {
353             final int startExpr = matcher.start();
354             final int endExpr = matcher.end();
355             final int lenExpr = endExpr - startExpr;
356             final InstructionHandle[] match = getMatch(startExpr, lenExpr);
357             if (constraint == null || constraint.checkCode(match)) {
358                 matches.add(match);
359             }
360             start = endExpr;
361         }
362         return matches.iterator();
363     }
364 
365     /*
366      * Internal debugging routines.
367      */
368 //    private static final String pattern2string( String pattern ) {
369 //        return pattern2string(pattern, true);
370 //    }
371 
372 //    private static final String pattern2string( String pattern, boolean make_string ) {
373 //        StringBuilder buf = new StringBuilder();
374 //        for (int i = 0; i < pattern.length(); i++) {
375 //            char ch = pattern.charAt(i);
376 //            if (ch >= OFFSET) {
377 //                if (make_string) {
378 //                    buf.append(Constants.getOpcodeName(ch - OFFSET));
379 //                } else {
380 //                    buf.append((ch - OFFSET));
381 //                }
382 //            } else {
383 //                buf.append(ch);
384 //            }
385 //        }
386 //        return buf.toString();
387 //    }
388 }