001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.bcel.verifier.structurals;
020
021import java.util.ArrayList;
022import java.util.HashMap;
023import java.util.HashSet;
024import java.util.List;
025import java.util.Map;
026import java.util.Set;
027
028import org.apache.bcel.generic.ASTORE;
029import org.apache.bcel.generic.ATHROW;
030import org.apache.bcel.generic.BranchInstruction;
031import org.apache.bcel.generic.CodeExceptionGen;
032import org.apache.bcel.generic.GotoInstruction;
033import org.apache.bcel.generic.IndexedInstruction;
034import org.apache.bcel.generic.Instruction;
035import org.apache.bcel.generic.InstructionHandle;
036import org.apache.bcel.generic.JsrInstruction;
037import org.apache.bcel.generic.LocalVariableInstruction;
038import org.apache.bcel.generic.MethodGen;
039import org.apache.bcel.generic.RET;
040import org.apache.bcel.generic.ReturnInstruction;
041import org.apache.bcel.generic.Select;
042import org.apache.bcel.verifier.exc.AssertionViolatedException;
043import org.apache.bcel.verifier.exc.StructuralCodeConstraintException;
044
045/**
046 * Instances of this class contain information about the subroutines found in a code array of a method. This
047 * implementation considers the top-level (the instructions reachable without a JSR or JSR_W starting off from the first
048 * instruction in a code array of a method) being a special subroutine; see getTopLevel() for that. Please note that the
049 * definition of subroutines in the Java Virtual Machine Specification, Second Edition is somewhat incomplete.
050 * Therefore, JustIce uses an own, more rigid notion. Basically, a subroutine is a piece of code that starts at the
051 * target of a JSR of JSR_W instruction and ends at a corresponding RET instruction. Note also that the control flow of
052 * a subroutine may be complex and non-linear; and that subroutines may be nested. JustIce also mandates subroutines not
053 * to be protected by exception handling code (for the sake of control flow predictability). To understand JustIce's
054 * notion of subroutines, please read
055 *
056 * TODO: refer to the paper.
057 *
058 * @see #getTopLevel()
059 */
060public class Subroutines {
061    // Node coloring constants
062    private enum ColourConstants {
063        WHITE, GRAY, BLACK
064    }
065
066    /**
067     * This inner class implements the Subroutine interface.
068     */
069    private final class SubroutineImpl implements Subroutine {
070        /**
071         * UNSET, a symbol for an uninitialized localVariable field. This is used for the "top-level" Subroutine; i.e. no
072         * subroutine.
073         */
074        private static final int UNSET = -1;
075
076        private final SubroutineImpl[] EMPTY_ARRAY = {};
077
078        /**
079         * The Local Variable slot where the first instruction of this subroutine (an ASTORE) stores the JsrInstruction's
080         * ReturnAddress in and the RET of this subroutine operates on.
081         */
082        private int localVariable = UNSET;
083
084        /** The instructions that belong to this subroutine. */
085        private final Set<InstructionHandle> instructions = new HashSet<>(); // Elements: InstructionHandle
086
087        /**
088         * The JSR or JSR_W instructions that define this subroutine by targeting it.
089         */
090        private final Set<InstructionHandle> theJSRs = new HashSet<>();
091
092        /**
093         * The RET instruction that leaves this subroutine.
094         */
095        private InstructionHandle theRET;
096
097        /**
098         * Constructs a new instance.
099         */
100        SubroutineImpl() {
101            // empty
102        }
103
104        /**
105         * Adds a new JSR or JSR_W that has this subroutine as its target.
106         */
107        public void addEnteringJsrInstruction(final InstructionHandle jsrInst) {
108            if (jsrInst == null || !(jsrInst.getInstruction() instanceof JsrInstruction)) {
109                throw new AssertionViolatedException("Expecting JsrInstruction InstructionHandle.");
110            }
111            if (localVariable == UNSET) {
112                throw new AssertionViolatedException("Set the localVariable first!");
113            }
114            // Something is wrong when an ASTORE is targeted that does not operate on the same local variable than the rest of the
115            // JsrInstruction-targets and the RET.
116            // (We don't know out leader here so we cannot check if we're really targeted!)
117            if (localVariable != ((ASTORE) ((JsrInstruction) jsrInst.getInstruction()).getTarget().getInstruction()).getIndex()) {
118                throw new AssertionViolatedException("Setting a wrong JsrInstruction.");
119            }
120            theJSRs.add(jsrInst);
121        }
122
123        /*
124         * Adds an instruction to this subroutine. All instructions must have been added before invoking setLeavingRET().
125         *
126         * @see #setLeavingRET
127         */
128        void addInstruction(final InstructionHandle ih) {
129            if (theRET != null) {
130                throw new AssertionViolatedException("All instructions must have been added before invoking setLeavingRET().");
131            }
132            instructions.add(ih);
133        }
134
135        /*
136         * Refer to the Subroutine interface for documentation.
137         */
138        @Override
139        public boolean contains(final InstructionHandle inst) {
140            return instructions.contains(inst);
141        }
142
143        /*
144         * Satisfies Subroutine.getAccessedLocalIndices().
145         */
146        @Override
147        public int[] getAccessedLocalsIndices() {
148            // TODO: Implement caching.
149            final Set<Integer> acc = new HashSet<>();
150            if (theRET == null && this != getTopLevel()) {
151                throw new AssertionViolatedException("This subroutine object must be built up completely before calculating accessed locals.");
152            }
153            {
154                for (final InstructionHandle ih : instructions) {
155                    // RET is not a LocalVariableInstruction in the current version of BCEL.
156                    if (ih.getInstruction() instanceof LocalVariableInstruction || ih.getInstruction() instanceof RET) {
157                        final int idx = ((IndexedInstruction) ih.getInstruction()).getIndex();
158                        acc.add(Integer.valueOf(idx));
159                        // LONG? DOUBLE?.
160                        try {
161                            // LocalVariableInstruction instances are typed without the need to look into
162                            // the constant pool.
163                            if (ih.getInstruction() instanceof LocalVariableInstruction) {
164                                final int s = ((LocalVariableInstruction) ih.getInstruction()).getType(null).getSize();
165                                if (s == 2) {
166                                    acc.add(Integer.valueOf(idx + 1));
167                                }
168                            }
169                        } catch (final RuntimeException re) {
170                            throw new AssertionViolatedException("BCEL did not like NULL as a ConstantPoolGen object.", re);
171                        }
172                    }
173                }
174            }
175
176            {
177                final int[] ret = new int[acc.size()];
178                int j = -1;
179                for (final Integer accessedLocal : acc) {
180                    j++;
181                    ret[j] = accessedLocal.intValue();
182                }
183                return ret;
184            }
185        }
186
187        /*
188         * Refer to the Subroutine interface for documentation.
189         */
190        @Override
191        public InstructionHandle[] getEnteringJsrInstructions() {
192            if (this == getTopLevel()) {
193                throw new AssertionViolatedException("getLeavingRET() called on top level pseudo-subroutine.");
194            }
195            return theJSRs.toArray(InstructionHandle.EMPTY_ARRAY);
196        }
197
198        /*
199         * Refer to the Subroutine interface for documentation.
200         */
201        @Override
202        public InstructionHandle[] getInstructions() {
203            return instructions.toArray(InstructionHandle.EMPTY_ARRAY);
204        }
205
206        /*
207         * Refer to the Subroutine interface for documentation.
208         */
209        @Override
210        public InstructionHandle getLeavingRET() {
211            if (this == getTopLevel()) {
212                throw new AssertionViolatedException("getLeavingRET() called on top level pseudo-subroutine.");
213            }
214            return theRET;
215        }
216
217        /* Satisfies Subroutine.getRecursivelyAccessedLocalsIndices(). */
218        @Override
219        public int[] getRecursivelyAccessedLocalsIndices() {
220            final Set<Integer> s = new HashSet<>();
221            final int[] lvs = getAccessedLocalsIndices();
222            for (final int lv : lvs) {
223                s.add(Integer.valueOf(lv));
224            }
225            getRecursivelyAccessedLocalsIndicesHelper(s, subSubs());
226            final int[] ret = new int[s.size()];
227            int j = -1;
228            for (final Integer index : s) {
229                j++;
230                ret[j] = index.intValue();
231            }
232            return ret;
233        }
234
235        /**
236         * A recursive helper method for getRecursivelyAccessedLocalsIndices().
237         *
238         * @see #getRecursivelyAccessedLocalsIndices()
239         */
240        private void getRecursivelyAccessedLocalsIndicesHelper(final Set<Integer> set, final Subroutine[] subs) {
241            for (final Subroutine sub : subs) {
242                final int[] lvs = sub.getAccessedLocalsIndices();
243                for (final int lv : lvs) {
244                    set.add(Integer.valueOf(lv));
245                }
246                if (sub.subSubs().length != 0) {
247                    getRecursivelyAccessedLocalsIndicesHelper(set, sub.subSubs());
248                }
249            }
250        }
251
252        /**
253         * Sets the leaving RET instruction. Must be invoked after all instructions are added. Must not be invoked for top-level
254         * 'subroutine'.
255         */
256        void setLeavingRET() {
257            if (localVariable == UNSET) {
258                throw new AssertionViolatedException("setLeavingRET() called for top-level 'subroutine' or forgot to set local variable first.");
259            }
260            InstructionHandle ret = null;
261            for (final InstructionHandle actual : instructions) {
262                if (actual.getInstruction() instanceof RET) {
263                    if (ret != null) {
264                        throw new StructuralCodeConstraintException("Subroutine with more then one RET detected: '" + ret + "' and '" + actual + "'.");
265                    }
266                    ret = actual;
267                }
268            }
269            if (ret == null) {
270                throw new StructuralCodeConstraintException("Subroutine without a RET detected.");
271            }
272            if (((RET) ret.getInstruction()).getIndex() != localVariable) {
273                throw new StructuralCodeConstraintException(
274                    "Subroutine uses '" + ret + "' which does not match the correct local variable '" + localVariable + "'.");
275            }
276            theRET = ret;
277        }
278
279        /*
280         * Sets the local variable slot the ASTORE that is targeted by the JsrInstructions of this subroutine operates on. This
281         * subroutine's RET operates on that same local variable slot, of course.
282         */
283        void setLocalVariable(final int i) {
284            if (localVariable != UNSET) {
285                throw new AssertionViolatedException("localVariable set twice.");
286            }
287            localVariable = i;
288        }
289
290        /*
291         * Satisfies Subroutine.subSubs().
292         */
293        @Override
294        public Subroutine[] subSubs() {
295            final Set<Subroutine> h = new HashSet<>();
296
297            for (final InstructionHandle ih : instructions) {
298                final Instruction inst = ih.getInstruction();
299                if (inst instanceof JsrInstruction) {
300                    final InstructionHandle targ = ((JsrInstruction) inst).getTarget();
301                    h.add(getSubroutine(targ));
302                }
303            }
304            return h.toArray(EMPTY_ARRAY);
305        }
306
307        /**
308         * Returns a String representation of this object, merely for debugging purposes. (Internal) Warning: Verbosity on a
309         * problematic subroutine may cause stack overflow errors due to recursive subSubs() calls. Don't use this, then.
310         */
311        @Override
312        public String toString() {
313            final StringBuilder ret = new StringBuilder();
314            ret.append("Subroutine: Local variable is '").append(localVariable);
315            ret.append("', JSRs are '").append(theJSRs);
316            ret.append("', RET is '").append(theRET);
317            ret.append("', Instructions: '").append(instructions).append("'.");
318
319            ret.append(" Accessed local variable slots: '");
320            int[] alv = getAccessedLocalsIndices();
321            for (final int element : alv) {
322                ret.append(element);
323                ret.append(" ");
324            }
325            ret.append("'.");
326
327            ret.append(" Recursively (via subsub...routines) accessed local variable slots: '");
328            alv = getRecursivelyAccessedLocalsIndices();
329            for (final int element : alv) {
330                ret.append(element);
331                ret.append(" ");
332            }
333            ret.append("'.");
334
335            return ret.toString();
336        }
337
338    } // end Inner Class SubrouteImpl
339
340    /**
341     * A utility method that calculates the successors of a given InstructionHandle <B>in the same subroutine</B>. That
342     * means, a RET does not have any successors as defined here. A JsrInstruction has its physical successor as its
343     * successor (opposed to its target) as defined here.
344     */
345    private static InstructionHandle[] getSuccessors(final InstructionHandle instruction) {
346        final InstructionHandle[] single = new InstructionHandle[1];
347
348        final Instruction inst = instruction.getInstruction();
349
350        // Terminates method normally.
351        // Terminates method abnormally, because JustIce mandates
352        // subroutines not to be protected by exception handlers.
353        if (inst instanceof RET || inst instanceof ReturnInstruction || inst instanceof ATHROW) {
354            return InstructionHandle.EMPTY_ARRAY;
355        }
356
357        // See method comment.
358        if (inst instanceof JsrInstruction) {
359            single[0] = instruction.getNext();
360            return single;
361        }
362
363        if (inst instanceof GotoInstruction) {
364            single[0] = ((GotoInstruction) inst).getTarget();
365            return single;
366        }
367
368        if (inst instanceof BranchInstruction) {
369            if (inst instanceof Select) {
370                // BCEL's getTargets() returns only the non-default targets,
371                // thanks to Eli Tilevich for reporting.
372                final InstructionHandle[] matchTargets = ((Select) inst).getTargets();
373                final InstructionHandle[] ret = new InstructionHandle[matchTargets.length + 1];
374                ret[0] = ((Select) inst).getTarget();
375                System.arraycopy(matchTargets, 0, ret, 1, matchTargets.length);
376                return ret;
377            }
378            final InstructionHandle[] pair = new InstructionHandle[2];
379            pair[0] = instruction.getNext();
380            pair[1] = ((BranchInstruction) inst).getTarget();
381            return pair;
382        }
383
384        // default case: Fall through.
385        single[0] = instruction.getNext();
386        return single;
387    }
388
389    /**
390     * The map containing the subroutines found. Key: InstructionHandle of the leader of the subroutine. Elements:
391     * SubroutineImpl objects.
392     */
393    private final Map<InstructionHandle, Subroutine> subroutines = new HashMap<>();
394
395    /**
396     * This is referring to a special subroutine, namely the top level. This is not really a subroutine but we use it to
397     * distinguish between top level instructions and unreachable instructions.
398     */
399    // CHECKSTYLE:OFF
400    public final Subroutine TOPLEVEL; // TODO can this be made private?
401    // CHECKSTYLE:ON
402
403    /**
404     * Constructs a new instance.
405     *
406     * @param mg A MethodGen object representing method to create the Subroutine objects of. Assumes that JustIce strict
407     *        checks are needed.
408     */
409    public Subroutines(final MethodGen mg) {
410        this(mg, true);
411    }
412
413    /**
414     * Constructs a new instance.
415     *
416     * @param mg A MethodGen object representing method to create the Subroutine objects of.
417     * @param enableJustIceCheck whether to enable additional JustIce checks
418     * @since 6.0
419     */
420    public Subroutines(final MethodGen mg, final boolean enableJustIceCheck) {
421        final InstructionHandle[] all = mg.getInstructionList().getInstructionHandles();
422        final CodeExceptionGen[] handlers = mg.getExceptionHandlers();
423
424        // Define our "Toplevel" fake subroutine.
425        TOPLEVEL = new SubroutineImpl();
426
427        // Calculate "real" subroutines.
428        final Set<InstructionHandle> subLeaders = new HashSet<>(); // Elements: InstructionHandle
429        for (final InstructionHandle element : all) {
430            final Instruction inst = element.getInstruction();
431            if (inst instanceof JsrInstruction) {
432                subLeaders.add(((JsrInstruction) inst).getTarget());
433            }
434        }
435
436        // Build up the database.
437        for (final InstructionHandle astore : subLeaders) {
438            final SubroutineImpl sr = new SubroutineImpl();
439            sr.setLocalVariable(((ASTORE) astore.getInstruction()).getIndex());
440            subroutines.put(astore, sr);
441        }
442
443        // Fake it a bit. We want a virtual "TopLevel" subroutine.
444        subroutines.put(all[0], TOPLEVEL);
445        subLeaders.add(all[0]);
446
447        // Tell the subroutines about their JsrInstructions.
448        // Note that there cannot be a JSR targeting the top-level
449        // since "Jsr 0" is disallowed in Pass 3a.
450        // Instructions shared by a subroutine and the toplevel are
451        // disallowed and checked below, after the BFS.
452        for (final InstructionHandle element : all) {
453            final Instruction inst = element.getInstruction();
454            if (inst instanceof JsrInstruction) {
455                final InstructionHandle leader = ((JsrInstruction) inst).getTarget();
456                ((SubroutineImpl) getSubroutine(leader)).addEnteringJsrInstruction(element);
457            }
458        }
459
460        // Now do a BFS from every subroutine leader to find all the
461        // instructions that belong to a subroutine.
462        // we don't want to assign an instruction to two or more Subroutine objects.
463        final Set<InstructionHandle> instructionsAssigned = new HashSet<>();
464
465        // Graph coloring. Key: InstructionHandle, Value: ColourConstants enum.
466        final Map<InstructionHandle, ColourConstants> colors = new HashMap<>();
467
468        final List<InstructionHandle> qList = new ArrayList<>();
469        for (final InstructionHandle actual : subLeaders) {
470            // Do some BFS with "actual" as the root of the graph.
471            // Init colors
472            for (final InstructionHandle element : all) {
473                colors.put(element, ColourConstants.WHITE);
474            }
475            colors.put(actual, ColourConstants.GRAY);
476            // Init Queue
477
478            qList.clear();
479            qList.add(actual); // add(Obj) adds to the end, remove(0) removes from the start.
480
481            /*
482             * BFS ALGORITHM MODIFICATION: Start out with multiple "root" nodes, as exception handlers are starting points of
483             * top-level code, too. [why top-level? TODO: Refer to the special JustIce notion of subroutines.]
484             */
485            if (actual == all[0]) {
486                for (final CodeExceptionGen handler : handlers) {
487                    colors.put(handler.getHandlerPC(), ColourConstants.GRAY);
488                    qList.add(handler.getHandlerPC());
489                }
490            }
491            /* CONTINUE NORMAL BFS ALGORITHM */
492
493            // Loop until Queue is empty
494            while (!qList.isEmpty()) {
495                final InstructionHandle u = qList.remove(0);
496                final InstructionHandle[] successors = getSuccessors(u);
497                for (final InstructionHandle successor : successors) {
498                    if (colors.get(successor) == ColourConstants.WHITE) {
499                        colors.put(successor, ColourConstants.GRAY);
500                        qList.add(successor);
501                    }
502                }
503                colors.put(u, ColourConstants.BLACK);
504            }
505            // BFS ended above.
506            for (final InstructionHandle element : all) {
507                if (colors.get(element) == ColourConstants.BLACK) {
508                    ((SubroutineImpl) (actual == all[0] ? getTopLevel() : getSubroutine(actual))).addInstruction(element);
509                    if (instructionsAssigned.contains(element)) {
510                        throw new StructuralCodeConstraintException(
511                            "Instruction '" + element + "' is part of more than one subroutine (or of the top level and a subroutine).");
512                    }
513                    instructionsAssigned.add(element);
514                }
515            }
516            if (actual != all[0]) { // If we don't deal with the top-level 'subroutine'
517                ((SubroutineImpl) getSubroutine(actual)).setLeavingRET();
518            }
519        }
520
521        if (enableJustIceCheck) {
522            // Now make sure no instruction of a Subroutine is protected by exception handling code
523            // as is mandated by JustIces notion of subroutines.
524            for (final CodeExceptionGen handler : handlers) {
525                InstructionHandle protectedIh = handler.getStartPC();
526                while (protectedIh != handler.getEndPC().getNext()) {
527                    // Note the inclusive/inclusive notation of "generic API" exception handlers!
528                    for (final Subroutine sub : subroutines.values()) {
529                        if (sub != subroutines.get(all[0]) && sub.contains(protectedIh)) {
530                            throw new StructuralCodeConstraintException("Subroutine instruction '" + protectedIh + "' is protected by an exception handler, '"
531                                + handler + "'. This is forbidden by the JustIce verifier due to its clear definition of subroutines.");
532                        }
533                    }
534                    protectedIh = protectedIh.getNext();
535                }
536            }
537        }
538
539        // Now make sure no subroutine is calling a subroutine
540        // that uses the same local variable for the RET as themselves
541        // (recursively).
542        // This includes that subroutines may not call themselves
543        // recursively, even not through intermediate calls to other
544        // subroutines.
545        noRecursiveCalls(getTopLevel(), new HashSet<>());
546
547    }
548
549    /**
550     * Returns the Subroutine object associated with the given leader (that is, the first instruction of the subroutine).
551     * You must not use this to get the top-level instructions modeled as a Subroutine object.
552     *
553     * @see #getTopLevel()
554     */
555    public Subroutine getSubroutine(final InstructionHandle leader) {
556        final Subroutine ret = subroutines.get(leader);
557
558        if (ret == null) {
559            throw new AssertionViolatedException("Subroutine requested for an InstructionHandle that is not a leader of a subroutine.");
560        }
561
562        if (ret == TOPLEVEL) {
563            throw new AssertionViolatedException("TOPLEVEL special subroutine requested; use getTopLevel().");
564        }
565
566        return ret;
567    }
568
569    /**
570     * For easy handling, the piece of code that is <B>not</B> a subroutine, the top-level, is also modeled as a Subroutine
571     * object. It is a special Subroutine object where <B>you must not invoke getEnteringJsrInstructions() or
572     * getLeavingRET()</B>.
573     *
574     * @see Subroutine#getEnteringJsrInstructions()
575     * @see Subroutine#getLeavingRET()
576     */
577    public Subroutine getTopLevel() {
578        return TOPLEVEL;
579    }
580
581    /**
582     * This (recursive) utility method makes sure that no subroutine is calling a subroutine that uses the same local
583     * variable for the RET as themselves (recursively). This includes that subroutines may not call themselves recursively,
584     * even not through intermediate calls to other subroutines.
585     *
586     * @throws StructuralCodeConstraintException if the above constraint is not satisfied.
587     */
588    private void noRecursiveCalls(final Subroutine sub, final Set<Integer> set) {
589        final Subroutine[] subs = sub.subSubs();
590
591        for (final Subroutine sub2 : subs) {
592            final int index = ((RET) sub2.getLeavingRET().getInstruction()).getIndex();
593
594            if (!set.add(Integer.valueOf(index))) {
595                // Don't use toString() here because of possibly infinite recursive subSubs() calls then.
596                final SubroutineImpl si = (SubroutineImpl) sub2;
597                throw new StructuralCodeConstraintException("Subroutine with local variable '" + si.localVariable + "', JSRs '" + si.theJSRs + "', RET '"
598                    + si.theRET + "' is called by a subroutine which uses the same local variable index as itself; maybe even a recursive call?"
599                    + " JustIce's clean definition of a subroutine forbids both.");
600            }
601
602            noRecursiveCalls(sub2, set);
603
604            set.remove(Integer.valueOf(index));
605        }
606    }
607
608    /**
609     * Returns the subroutine object associated with the given instruction. This is a costly operation, you should consider
610     * using getSubroutine(InstructionHandle). Returns 'null' if the given InstructionHandle lies in so-called 'dead code',
611     * i.e. code that can never be executed.
612     *
613     * @see #getSubroutine(InstructionHandle)
614     * @see #getTopLevel()
615     */
616    public Subroutine subroutineOf(final InstructionHandle any) {
617        for (final Subroutine s : subroutines.values()) {
618            if (s.contains(any)) {
619                return s;
620            }
621        }
622        System.err.println("DEBUG: Please verify '" + any.toString(true) + "' lies in dead code.");
623        return null;
624        // throw new AssertionViolatedException("No subroutine for InstructionHandle found (DEAD CODE?).");
625    }
626
627    /**
628     * Returns a String representation of this object; merely for debugging puposes.
629     */
630    @Override
631    public String toString() {
632        return "---\n" + subroutines + "\n---\n";
633    }
634}