View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License. 
16   *
17   */
18  package org.apache.bcel.util;
19  
20  import java.util.ArrayList;
21  import java.util.HashMap;
22  import java.util.Iterator;
23  import java.util.List;
24  import java.util.Locale;
25  import java.util.Map;
26  import java.util.regex.Matcher;
27  import java.util.regex.Pattern;
28  
29  import org.apache.bcel.Constants;
30  import org.apache.bcel.generic.ClassGenException;
31  import org.apache.bcel.generic.Instruction;
32  import org.apache.bcel.generic.InstructionHandle;
33  import org.apache.bcel.generic.InstructionList;
34  
35  /**
36   * InstructionFinder is a tool to search for given instructions patterns, i.e.,
37   * match sequences of instructions in an instruction list via regular
38   * expressions. This can be used, e.g., in order to implement a peep hole
39   * optimizer that looks for code patterns and replaces them with faster
40   * equivalents.
41   * 
42   * <p>
43   * This class internally uses the java.util.regex
44   * package to search for regular expressions.
45   * 
46   * A typical application would look like this:
47   * 
48   * <pre>
49   * 
50   *  
51   *   InstructionFinder f   = new InstructionFinder(il);
52   *   String            pat = &quot;IfInstruction ICONST_0 GOTO ICONST_1 NOP (IFEQ|IFNE)&quot;;
53   *   
54   *   for(Iterator i = f.search(pat, constraint); i.hasNext(); ) {
55   *   InstructionHandle[] match = (InstructionHandle[])i.next();
56   *   ...
57   *   il.delete(match[1], match[5]);
58   *   ...
59   *   }
60   *   
61   *  
62   * </pre>
63   * 
64   * @version $Id: InstructionFinder.html 898356 2014-02-18 05:44:40Z ggregory $
65   * @author <A HREF="mailto:m.dahm@gmx.de">M. Dahm</A>
66   * @see Instruction
67   * @see InstructionList
68   */
69  public class InstructionFinder {
70  
71      private static final int OFFSET = 32767; // char + OFFSET is
72      // outside of
73      // LATIN-1
74      private static final int NO_OPCODES = 256; // Potential number,
75      // some are not used
76      private static final Map<String, String> map = new HashMap<String, String>();
77      private InstructionList il;
78      private String il_string; // instruction list
79      // as string
80      private InstructionHandle[] handles; // map instruction
81  
82  
83      // list to array
84      /**
85       * @param il
86       *          instruction list to search for given patterns
87       */
88      public InstructionFinder(InstructionList il) {
89          this.il = il;
90          reread();
91      }
92  
93  
94      /**
95       * Reread the instruction list, e.g., after you've altered the list upon a
96       * match.
97       */
98      public final void reread() {
99          int size = il.getLength();
100         char[] buf = new char[size]; // Create a string with length equal to il
101         // length
102         handles = il.getInstructionHandles();
103         // Map opcodes to characters
104         for (int i = 0; i < size; i++) {
105             buf[i] = makeChar(handles[i].getInstruction().getOpcode());
106         }
107         il_string = new String(buf);
108     }
109 
110 
111     /**
112      * Map symbolic instruction names like "getfield" to a single character.
113      * 
114      * @param pattern
115      *          instruction pattern in lower case
116      * @return encoded string for a pattern such as "BranchInstruction".
117      */
118     private static final String mapName( String pattern ) {
119         String result = map.get(pattern);
120         if (result != null) {
121             return result;
122         }
123         for (short i = 0; i < NO_OPCODES; i++) {
124             if (pattern.equals(Constants.OPCODE_NAMES[i])) {
125                 return "" + makeChar(i);
126             }
127         }
128         throw new RuntimeException("Instruction unknown: " + pattern);
129     }
130 
131 
132     /**
133      * Replace symbolic names of instructions with the appropiate character and
134      * remove all white space from string. Meta characters such as +, * are
135      * ignored.
136      * 
137      * @param pattern
138      *          The pattern to compile
139      * @return translated regular expression string
140      */
141     private static final String compilePattern( String pattern ) {
142         //Bug: 38787 - Instructions are assumed to be english, to avoid odd Locale issues
143         String lower = pattern.toLowerCase(Locale.ENGLISH);
144         StringBuilder buf = new StringBuilder();
145         int size = pattern.length();
146         for (int i = 0; i < size; i++) {
147             char ch = lower.charAt(i);
148             if (Character.isLetterOrDigit(ch)) {
149                 StringBuilder name = new StringBuilder();
150                 while ((Character.isLetterOrDigit(ch) || ch == '_') && i < size) {
151                     name.append(ch);
152                     if (++i < size) {
153                         ch = lower.charAt(i);
154                     } else {
155                         break;
156                     }
157                 }
158                 i--;
159                 buf.append(mapName(name.toString()));
160             } else if (!Character.isWhitespace(ch)) {
161                 buf.append(ch);
162             }
163         }
164         return buf.toString();
165     }
166 
167 
168     /**
169      * @return the matched piece of code as an array of instruction (handles)
170      */
171     private InstructionHandle[] getMatch( int matched_from, int match_length ) {
172         InstructionHandle[] match = new InstructionHandle[match_length];
173         System.arraycopy(handles, matched_from, match, 0, match_length);
174         return match;
175     }
176 
177 
178     /**
179      * Search for the given pattern in the instruction list. You can search for
180      * any valid opcode via its symbolic name, e.g. "istore". You can also use a
181      * super class or an interface name to match a whole set of instructions, e.g.
182      * "BranchInstruction" or "LoadInstruction". "istore" is also an alias for all
183      * "istore_x" instructions. Additional aliases are "if" for "ifxx", "if_icmp"
184      * for "if_icmpxx", "if_acmp" for "if_acmpxx".
185      * 
186      * Consecutive instruction names must be separated by white space which will
187      * be removed during the compilation of the pattern.
188      * 
189      * For the rest the usual pattern matching rules for regular expressions
190      * apply.
191      * <P>
192      * Example pattern:
193      * 
194      * <pre>
195      * search(&quot;BranchInstruction NOP ((IfInstruction|GOTO)+ ISTORE Instruction)*&quot;);
196      * </pre>
197      * 
198      * <p>
199      * If you alter the instruction list upon a match such that other matching
200      * areas are affected, you should call reread() to update the finder and call
201      * search() again, because the matches are cached.
202      * 
203      * @param pattern
204      *          the instruction pattern to search for, where case is ignored
205      * @param from
206      *          where to start the search in the instruction list
207      * @param constraint
208      *          optional CodeConstraint to check the found code pattern for
209      *          user-defined constraints
210      * @return iterator of matches where e.nextElement() returns an array of
211      *         instruction handles describing the matched area
212      */
213     public final Iterator<InstructionHandle[]> search( String pattern, InstructionHandle from, CodeConstraint constraint ) {
214         String search = compilePattern(pattern);
215         int start = -1;
216         for (int i = 0; i < handles.length; i++) {
217             if (handles[i] == from) {
218                 start = i; // Where to start search from (index)
219                 break;
220             }
221         }
222         if (start == -1) {
223             throw new ClassGenException("Instruction handle " + from
224                     + " not found in instruction list.");
225         }
226         Pattern regex = Pattern.compile(search);
227         List<InstructionHandle[]> matches = new ArrayList<InstructionHandle[]>();
228         Matcher matcher = regex.matcher(il_string);
229         while (start < il_string.length() && matcher.find(start)) {
230             int startExpr = matcher.start();
231             int endExpr = matcher.end();
232             int lenExpr = (endExpr - startExpr);
233             InstructionHandle[] match = getMatch(startExpr, lenExpr);
234             if ((constraint == null) || constraint.checkCode(match)) {
235                 matches.add(match);
236             }
237             start = endExpr;
238         }
239         return matches.iterator();
240     }
241 
242 
243     /**
244      * Start search beginning from the start of the given instruction list.
245      * 
246      * @param pattern
247      *          the instruction pattern to search for, where case is ignored
248      * @return iterator of matches where e.nextElement() returns an array of
249      *         instruction handles describing the matched area
250      */
251     public final Iterator<InstructionHandle[]> search( String pattern ) {
252         return search(pattern, il.getStart(), null);
253     }
254 
255 
256     /**
257      * Start search beginning from `from'.
258      * 
259      * @param pattern
260      *          the instruction pattern to search for, where case is ignored
261      * @param from
262      *          where to start the search in the instruction list
263      * @return iterator of matches where e.nextElement() returns an array of
264      *         instruction handles describing the matched area
265      */
266     public final Iterator<InstructionHandle[]> search( String pattern, InstructionHandle from ) {
267         return search(pattern, from, null);
268     }
269 
270 
271     /**
272      * Start search beginning from the start of the given instruction list. Check
273      * found matches with the constraint object.
274      * 
275      * @param pattern
276      *          the instruction pattern to search for, case is ignored
277      * @param constraint
278      *          constraints to be checked on matching code
279      * @return instruction handle or `null' if the match failed
280      */
281     public final Iterator<InstructionHandle[]> search( String pattern, CodeConstraint constraint ) {
282         return search(pattern, il.getStart(), constraint);
283     }
284 
285 
286     /**
287      * Convert opcode number to char.
288      */
289     private static final char makeChar( short opcode ) {
290         return (char) (opcode + OFFSET);
291     }
292 
293 
294     /**
295      * @return the inquired instruction list
296      */
297     public final InstructionList getInstructionList() {
298         return il;
299     }
300 
301     /**
302      * Code patterns found may be checked using an additional user-defined
303      * constraint object whether they really match the needed criterion. I.e.,
304      * check constraints that can not expressed with regular expressions.
305      * 
306      */
307     public static interface CodeConstraint {
308 
309         /**
310          * @param match
311          *          array of instructions matching the requested pattern
312          * @return true if the matched area is really useful
313          */
314         public boolean checkCode( InstructionHandle[] match );
315     }
316 
317     // Initialize pattern map
318     static {
319         map.put("arithmeticinstruction","(irem|lrem|iand|ior|ineg|isub|lneg|fneg|fmul|ldiv|fadd|lxor|frem|idiv|land|ixor|ishr|fsub|lshl|fdiv|iadd|lor|dmul|lsub|ishl|imul|lmul|lushr|dneg|iushr|lshr|ddiv|drem|dadd|ladd|dsub)");
320 		map.put("invokeinstruction", "(invokevirtual|invokeinterface|invokestatic|invokespecial)");
321 		map.put("arrayinstruction", "(baload|aastore|saload|caload|fastore|lastore|iaload|castore|iastore|aaload|bastore|sastore|faload|laload|daload|dastore)");
322 		map.put("gotoinstruction", "(goto|goto_w)");
323 		map.put("conversioninstruction", "(d2l|l2d|i2s|d2i|l2i|i2b|l2f|d2f|f2i|i2d|i2l|f2d|i2c|f2l|i2f)");
324 		map.put("localvariableinstruction","(fstore|iinc|lload|dstore|dload|iload|aload|astore|istore|fload|lstore)");
325 		map.put("loadinstruction", "(fload|dload|lload|iload|aload)");
326 		map.put("fieldinstruction", "(getfield|putstatic|getstatic|putfield)");
327 		map.put("cpinstruction", "(ldc2_w|invokeinterface|multianewarray|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|ldc_w|invokestatic|invokevirtual|putfield|ldc|new|anewarray)");
328 		map.put("stackinstruction", "(dup2|swap|dup2_x2|pop|pop2|dup|dup2_x1|dup_x2|dup_x1)");
329 		map.put("branchinstruction", "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
330 		map.put("returninstruction", "(lreturn|ireturn|freturn|dreturn|areturn|return)");
331 		map.put("storeinstruction", "(istore|fstore|dstore|astore|lstore)");
332 		map.put("select", "(tableswitch|lookupswitch)");
333 		map.put("ifinstruction", "(ifeq|ifgt|if_icmpne|if_icmpeq|ifge|ifnull|ifne|if_icmple|if_icmpge|if_acmpeq|if_icmplt|if_acmpne|ifnonnull|iflt|if_icmpgt|ifle)");
334 		map.put("jsrinstruction", "(jsr|jsr_w)");
335 		map.put("variablelengthinstruction", "(tableswitch|jsr|goto|lookupswitch)");
336 		map.put("unconditionalbranch", "(goto|jsr|jsr_w|athrow|goto_w)");
337 		map.put("constantpushinstruction", "(dconst|bipush|sipush|fconst|iconst|lconst)");
338 		map.put("typedinstruction", "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dastore|ret|f2d|f2i|drem|iinc|i2c|checkcast|frem|lreturn|astore|lushr|daload|dneg|fastore|istore|lshl|ldiv|lstore|areturn|ishr|ldc_w|invokeinterface|aastore|lxor|ishl|l2d|i2f|return|faload|sipush|iushr|caload|instanceof|invokespecial|putfield|fmul|ireturn|laload|d2f|lneg|ixor|i2l|fdiv|lastore|multianewarray|i2b|getstatic|i2d|putstatic|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|freturn|ldc|aconst_null|castore|lmul|ldc2_w|dadd|iconst|f2l|ddiv|dstore|land|jsr|anewarray|dmul|bipush|dsub|sastore|d2i|i2s|lshr|iadd|l2i|lload|bastore|fstore|fneg|iload|fadd|baload|fconst|ior|ineg|dreturn|l2f|lconst|getfield|invokevirtual|invokestatic|iastore)");
339 		map.put("popinstruction", "(fstore|dstore|pop|pop2|astore|putstatic|istore|lstore)");
340 		map.put("allocationinstruction", "(multianewarray|new|anewarray|newarray)");
341 		map.put("indexedinstruction", "(lload|lstore|fload|ldc2_w|invokeinterface|multianewarray|astore|dload|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|dstore|istore|iinc|ldc_w|ret|fstore|invokestatic|iload|putfield|invokevirtual|ldc|new|aload|anewarray)");
342 		map.put("pushinstruction", "(dup|lload|dup2|bipush|fload|ldc2_w|sipush|lconst|fconst|dload|getstatic|ldc_w|aconst_null|dconst|iload|ldc|iconst|aload)");
343 		map.put("stackproducer", "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dup|f2d|f2i|drem|i2c|checkcast|frem|lushr|daload|dneg|lshl|ldiv|ishr|ldc_w|invokeinterface|lxor|ishl|l2d|i2f|faload|sipush|iushr|caload|instanceof|invokespecial|fmul|laload|d2f|lneg|ixor|i2l|fdiv|getstatic|i2b|swap|i2d|dup2|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|ldc|arraylength|aconst_null|tableswitch|lmul|ldc2_w|iconst|dadd|f2l|ddiv|land|jsr|anewarray|dmul|bipush|dsub|d2i|newarray|i2s|lshr|iadd|lload|l2i|fneg|iload|fadd|baload|fconst|lookupswitch|ior|ineg|lconst|l2f|getfield|invokevirtual|invokestatic)");
344 		map.put("stackconsumer", "(imul|lsub|lor|iflt|fcmpg|if_icmpgt|iand|ifeq|if_icmplt|lrem|ifnonnull|idiv|d2l|isub|dcmpg|dastore|if_icmpeq|f2d|f2i|drem|i2c|checkcast|frem|lreturn|astore|lushr|pop2|monitorexit|dneg|fastore|istore|lshl|ldiv|lstore|areturn|if_icmpge|ishr|monitorenter|invokeinterface|aastore|lxor|ishl|l2d|i2f|return|iushr|instanceof|invokespecial|fmul|ireturn|d2f|lneg|ixor|pop|i2l|ifnull|fdiv|lastore|i2b|if_acmpeq|ifge|swap|i2d|putstatic|fcmpl|ladd|irem|dcmpl|fsub|freturn|ifgt|castore|lmul|dadd|f2l|ddiv|dstore|land|if_icmpne|if_acmpne|dmul|dsub|sastore|ifle|d2i|i2s|lshr|iadd|l2i|bastore|fstore|fneg|fadd|ior|ineg|ifne|dreturn|l2f|if_icmple|getfield|invokevirtual|invokestatic|iastore)");
345 		map.put("exceptionthrower","(irem|lrem|laload|putstatic|baload|dastore|areturn|getstatic|ldiv|anewarray|iastore|castore|idiv|saload|lastore|fastore|putfield|lreturn|caload|getfield|return|aastore|freturn|newarray|instanceof|multianewarray|athrow|faload|iaload|aaload|dreturn|monitorenter|checkcast|bastore|arraylength|new|invokevirtual|sastore|ldc_w|ireturn|invokespecial|monitorexit|invokeinterface|ldc|invokestatic|daload)");
346 		map.put("loadclass", "(multianewarray|invokeinterface|instanceof|invokespecial|putfield|checkcast|putstatic|invokevirtual|new|getstatic|invokestatic|getfield|anewarray)");
347 		map.put("instructiontargeter", "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
348 		// Some aliases
349 		map.put("if_icmp", "(if_icmpne|if_icmpeq|if_icmple|if_icmpge|if_icmplt|if_icmpgt)");
350 		map.put("if_acmp", "(if_acmpeq|if_acmpne)");
351 		map.put("if", "(ifeq|ifne|iflt|ifge|ifgt|ifle)");
352 		// Precompile some aliases first
353 		map.put("iconst", precompile(Constants.ICONST_0, Constants.ICONST_5, Constants.ICONST_M1));
354 		map.put("lconst", new String(new char[] { '(', makeChar(Constants.LCONST_0), '|', makeChar(Constants.LCONST_1), ')' }));
355 		map.put("dconst", new String(new char[] { '(', makeChar(Constants.DCONST_0), '|', makeChar(Constants.DCONST_1), ')' }));
356 		map.put("fconst", new String(new char[] { '(', makeChar(Constants.FCONST_0), '|', makeChar(Constants.FCONST_1), ')' }));
357 		map.put("iload", precompile(Constants.ILOAD_0, Constants.ILOAD_3, Constants.ILOAD));
358 		map.put("dload", precompile(Constants.DLOAD_0, Constants.DLOAD_3, Constants.DLOAD));
359 		map.put("fload", precompile(Constants.FLOAD_0, Constants.FLOAD_3, Constants.FLOAD));
360 		map.put("aload", precompile(Constants.ALOAD_0, Constants.ALOAD_3, Constants.ALOAD));
361 		map.put("istore", precompile(Constants.ISTORE_0, Constants.ISTORE_3, Constants.ISTORE));
362 		map.put("dstore", precompile(Constants.DSTORE_0, Constants.DSTORE_3, Constants.DSTORE));
363 		map.put("fstore", precompile(Constants.FSTORE_0, Constants.FSTORE_3, Constants.FSTORE));
364 		map.put("astore", precompile(Constants.ASTORE_0, Constants.ASTORE_3, Constants.ASTORE));
365 		// Compile strings
366 		for (String key : map.keySet()) {
367 			String value = map.get(key);
368 			char ch = value.charAt(1); // Omit already precompiled patterns
369 			if (ch < OFFSET) {
370 				map.put(key, compilePattern(value)); // precompile all
371 														// patterns
372 			}
373 		}
374 		// Add instruction alias to match anything
375 		StringBuilder buf = new StringBuilder("(");
376 		for (short i = 0; i < NO_OPCODES; i++) {
377 			if (Constants.NO_OF_OPERANDS[i] != Constants.UNDEFINED) { // Not
378 																		// an
379 				// invalid
380 				// opcode
381 				buf.append(makeChar(i));
382 				if (i < NO_OPCODES - 1) {
383 					buf.append('|');
384 				}
385 			}
386 		}
387 		buf.append(')');
388 		map.put("instruction", buf.toString());
389     }
390 
391 
392     private static String precompile( short from, short to, short extra ) {
393         StringBuilder buf = new StringBuilder("(");
394         for (short i = from; i <= to; i++) {
395             buf.append(makeChar(i));
396             buf.append('|');
397         }
398         buf.append(makeChar(extra));
399         buf.append(")");
400         return buf.toString();
401     }
402 
403 
404     /*
405 	 * Internal debugging routines.
406 	 */
407 //    private static final String pattern2string( String pattern ) {
408 //        return pattern2string(pattern, true);
409 //    }
410 
411 
412 //    private static final String pattern2string( String pattern, boolean make_string ) {
413 //        StringBuffer buf = new StringBuffer();
414 //        for (int i = 0; i < pattern.length(); i++) {
415 //            char ch = pattern.charAt(i);
416 //            if (ch >= OFFSET) {
417 //                if (make_string) {
418 //                    buf.append(Constants.OPCODE_NAMES[ch - OFFSET]);
419 //                } else {
420 //                    buf.append((ch - OFFSET));
421 //                }
422 //            } else {
423 //                buf.append(ch);
424 //            }
425 //        }
426 //        return buf.toString();
427 //    }
428 }