Threader.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.net.nntp;

  18. import java.util.Arrays;
  19. import java.util.HashMap;
  20. import java.util.List;
  21. import java.util.Map;

  22. /**
  23.  * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
  24.  * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
  25.  * For his Java implementation, see
  26.  * <a href="https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">
  27.  * https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
  28.  */
  29. public class Threader {

  30.     /**
  31.      *
  32.      * @param threadable
  33.      * @param idTable
  34.      */
  35.     private void buildContainer(final Threadable threadable, final HashMap<String, NntpThreadContainer> idTable) {
  36.         String id = threadable.messageThreadId();
  37.         NntpThreadContainer container = idTable.get(id);
  38.         int bogusIdCount = 0;

  39.         // A NntpThreadContainer exists for this id already. This should be a forward reference, but may
  40.         // be a duplicate id, in which case we will need to generate a bogus placeholder id
  41.         if (container != null) {
  42.             if (container.threadable != null) { // oops! duplicate ids...
  43.                 bogusIdCount++; // Avoid dead local store warning
  44.                 id = "<Bogus-id:" + bogusIdCount + ">";
  45.                 container = null;
  46.             } else {
  47.                 // The container just contained a forward reference to this message, so let's
  48.                 // fill in the threadable field of the container with this message
  49.                 container.threadable = threadable;
  50.             }
  51.         }

  52.         // No container exists for that message Id. Create one and insert it into the hash table.
  53.         if (container == null) {
  54.             container = new NntpThreadContainer();
  55.             container.threadable = threadable;
  56.             idTable.put(id, container);
  57.         }

  58.         // Iterate through all the references and create ThreadContainers for any references that
  59.         // don't have them.
  60.         NntpThreadContainer parentRef = null;
  61.         {
  62.             final String[] references = threadable.messageThreadReferences();
  63.             for (final String refString : references) {
  64.                 NntpThreadContainer ref = idTable.get(refString);

  65.                 // if this id doesn't have a container, create one
  66.                 if (ref == null) {
  67.                     ref = new NntpThreadContainer();
  68.                     idTable.put(refString, ref);
  69.                 }

  70.                 // Link references together in the order they appear in the References: header,
  71.                 // IF they don't have a parent already &&
  72.                 // IF it will not cause a circular reference
  73.                 if (parentRef != null && ref.parent == null && parentRef != ref && !ref.findChild(parentRef)) {
  74.                     // Link ref into the parent's child list
  75.                     ref.parent = parentRef;
  76.                     ref.next = parentRef.child;
  77.                     parentRef.child = ref;
  78.                 }
  79.                 parentRef = ref;
  80.             }
  81.         }

  82.         // parentRef is now set to the container of the last element in the references field. make that
  83.         // be the parent of this container, unless doing so causes a circular reference
  84.         if (parentRef != null && (parentRef == container || container.findChild(parentRef))) {
  85.             parentRef = null;
  86.         }

  87.         // if it has a parent already, it's because we saw this message in a References: field, and presumed
  88.         // a parent based on the other entries in that field. Now that we have the actual message, we can
  89.         // throw away the old parent and use this new one
  90.         if (container.parent != null) {
  91.             NntpThreadContainer rest, prev;

  92.             for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) {
  93.                 if (rest == container) {
  94.                     break;
  95.                 }
  96.             }

  97.             if (rest == null) {
  98.                 throw new IllegalStateException("Didnt find " + container + " in parent " + container.parent);
  99.             }

  100.             // Unlink this container from the parent's child list
  101.             if (prev == null) {
  102.                 container.parent.child = container.next;
  103.             } else {
  104.                 prev.next = container.next;
  105.             }

  106.             container.next = null;
  107.             container.parent = null;
  108.         }

  109.         // If we have a parent, link container into the parents child list
  110.         if (parentRef != null) {
  111.             container.parent = parentRef;
  112.             container.next = parentRef.child;
  113.             parentRef.child = container;
  114.         }
  115.     }

  116.     /**
  117.      * Find the root set of all existing ThreadContainers
  118.      *
  119.      * @param idTable
  120.      * @return root the NntpThreadContainer representing the root node
  121.      */
  122.     private NntpThreadContainer findRootSet(final HashMap<String, NntpThreadContainer> idTable) {
  123.         final NntpThreadContainer root = new NntpThreadContainer();
  124.         for (final Map.Entry<String, NntpThreadContainer> entry : idTable.entrySet()) {
  125.             final NntpThreadContainer c = entry.getValue();
  126.             if (c.parent == null) {
  127.                 if (c.next != null) {
  128.                     throw new IllegalStateException("c.next is " + c.next.toString());
  129.                 }
  130.                 c.next = root.child;
  131.                 root.child = c;
  132.             }
  133.         }
  134.         return root;
  135.     }

  136.     /**
  137.      * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
  138.      *
  139.      * @param root
  140.      */
  141.     private void gatherSubjects(final NntpThreadContainer root) {

  142.         int count = 0;

  143.         for (NntpThreadContainer c = root.child; c != null; c = c.next) {
  144.             count++;
  145.         }

  146.         // TODO verify this will avoid rehashing
  147.         HashMap<String, NntpThreadContainer> subjectTable = new HashMap<>((int) (count * 1.2), (float) 0.9);
  148.         count = 0;

  149.         for (NntpThreadContainer c = root.child; c != null; c = c.next) {
  150.             Threadable threadable = c.threadable;

  151.             // No threadable? If so, it is a dummy node in the root set.
  152.             // Only root set members may be dummies, and they always have at least 2 kids
  153.             // Take the first kid as representative of the subject
  154.             if (threadable == null) {
  155.                 threadable = c.child.threadable;
  156.             }

  157.             final String subj = threadable.simplifiedSubject();

  158.             if (subj == null || subj.isEmpty()) {
  159.                 continue;
  160.             }

  161.             final NntpThreadContainer old = subjectTable.get(subj);

  162.             // Add this container to the table iff:
  163.             // - There exists no container with this subject
  164.             // - or this is a dummy container and the old one is not - the dummy one is
  165.             // more interesting as a root, so put it in the table instead
  166.             // - The container in the table has a "Re:" version of this subject, and
  167.             // this container has a non-"Re:" version of this subject. The non-"Re:" version
  168.             // is the more interesting of the two.
  169.             if (old == null || c.threadable == null && old.threadable != null
  170.                     || old.threadable != null && old.threadable.subjectIsReply() && c.threadable != null && !c.threadable.subjectIsReply()) {
  171.                 subjectTable.put(subj, c);
  172.                 count++;
  173.             }
  174.         }

  175.         // If the table is empty, we're done
  176.         if (count == 0) {
  177.             return;
  178.         }

  179.         // subjectTable is now populated with one entry for each subject which occurs in the
  180.         // root set. Iterate over the root set, and gather together the difference.
  181.         NntpThreadContainer prev, c, rest;
  182.         for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = rest == null ? null : rest.next) {
  183.             Threadable threadable = c.threadable;

  184.             // is it a dummy node?
  185.             if (threadable == null) {
  186.                 threadable = c.child.threadable;
  187.             }

  188.             final String subj = threadable.simplifiedSubject();

  189.             // Don't thread together all subjectless messages
  190.             if (subj == null || subj.isEmpty()) {
  191.                 continue;
  192.             }

  193.             final NntpThreadContainer old = subjectTable.get(subj);

  194.             if (old == c) { // That's us
  195.                 continue;
  196.             }

  197.             // We have now found another container in the root set with the same subject
  198.             // Remove the "second" message from the root set
  199.             if (prev == null) {
  200.                 root.child = c.next;
  201.             } else {
  202.                 prev.next = c.next;
  203.             }
  204.             c.next = null;

  205.             if (old.threadable == null && c.threadable == null) {
  206.                 // both dummies - merge them
  207.                 NntpThreadContainer tail;
  208.                 for (tail = old.child; tail != null && tail.next != null; tail = tail.next) {
  209.                     // do nothing
  210.                 }

  211.                 if (tail != null) { // protect against possible NPE
  212.                     tail.next = c.child;
  213.                 }

  214.                 for (tail = c.child; tail != null; tail = tail.next) {
  215.                     tail.parent = old;
  216.                 }

  217.                 c.child = null;
  218.             } else if (old.threadable == null || c.threadable != null && c.threadable.subjectIsReply() && !old.threadable.subjectIsReply()) {
  219.                 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
  220.                 c.parent = old;
  221.                 c.next = old.child;
  222.                 old.child = c;
  223.             } else {
  224.                 // else make the old and new messages be children of a new dummy container.
  225.                 // We create a new container object for old.msg and empty the old container
  226.                 final NntpThreadContainer newc = new NntpThreadContainer();
  227.                 newc.threadable = old.threadable;
  228.                 newc.child = old.child;

  229.                 for (NntpThreadContainer tail = newc.child; tail != null; tail = tail.next) {
  230.                     tail.parent = newc;
  231.                 }

  232.                 old.threadable = null;
  233.                 old.child = null;

  234.                 c.parent = old;
  235.                 newc.parent = old;

  236.                 // Old is now a dummy- give it 2 kids , c and newc
  237.                 old.child = c;
  238.                 c.next = newc;
  239.             }
  240.             // We've done a merge, so keep the same prev
  241.             c = prev;
  242.         }

  243.         subjectTable.clear();
  244.         subjectTable = null;

  245.     }

  246.     /**
  247.      * Delete any empty or dummy ThreadContainers
  248.      *
  249.      * @param parent
  250.      */
  251.     private void pruneEmptyContainers(final NntpThreadContainer parent) {
  252.         NntpThreadContainer container, prev, next;
  253.         for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = container == null
  254.                 ? null
  255.                 : container.next) {

  256.             // Is it empty and without any children? If so,delete it
  257.             if (container.threadable == null && container.child == null) {
  258.                 if (prev == null) {
  259.                     parent.child = container.next;
  260.                 } else {
  261.                     prev.next = container.next;
  262.                 }

  263.                 // Set container to prev so that prev keeps its same value the next time through the loop
  264.                 container = prev;
  265.             }

  266.             // Else if empty, with kids, and (not at root or only one kid)
  267.             else if (container.threadable == null && (container.parent != null || container.child.next == null)) {
  268.                 // We have an invalid/expired message with kids. Promote the kids to this level.
  269.                 NntpThreadContainer tail;
  270.                 final NntpThreadContainer kids = container.child;

  271.                 // Remove this container and replace with 'kids'.
  272.                 if (prev == null) {
  273.                     parent.child = kids;
  274.                 } else {
  275.                     prev.next = kids;
  276.                 }

  277.                 // Make each child's parent be this level's parent -> i.e. promote the children.
  278.                 // Make the last child's next point to this container's next
  279.                 // i.e. splice kids into the list in place of container
  280.                 for (tail = kids; tail.next != null; tail = tail.next) {
  281.                     tail.parent = container.parent;
  282.                 }

  283.                 tail.parent = container.parent;
  284.                 tail.next = container.next;

  285.                 // next currently points to the item after the inserted items in the chain - reset that, so we process the newly
  286.                 // promoted items next time round
  287.                 next = kids;

  288.                 // Set container to prev so that prev keeps its same value the next time through the loop
  289.                 container = prev;
  290.             } else if (container.child != null) {
  291.                 // A real message , with kids
  292.                 // Iterate over the children
  293.                 pruneEmptyContainers(container);
  294.             }
  295.         }
  296.     }

  297.     /**
  298.      * The client passes in a list of Iterable objects, and the Threader constructs a connected 'graph' of messages
  299.      *
  300.      * @param messages iterable of messages to thread, must not be empty
  301.      * @return null if messages == null or root.child == null or messages list is empty
  302.      * @since 3.0
  303.      */
  304.     public Threadable thread(final Iterable<? extends Threadable> messages) {
  305.         if (messages == null) {
  306.             return null;
  307.         }

  308.         HashMap<String, NntpThreadContainer> idTable = new HashMap<>();

  309.         // walk through each Threadable element
  310.         for (final Threadable t : messages) {
  311.             if (!t.isDummy()) {
  312.                 buildContainer(t, idTable);
  313.             }
  314.         }

  315.         if (idTable.isEmpty()) {
  316.             return null;
  317.         }

  318.         final NntpThreadContainer root = findRootSet(idTable);
  319.         idTable.clear();
  320.         idTable = null;

  321.         pruneEmptyContainers(root);

  322.         root.reverseChildren();
  323.         gatherSubjects(root);

  324.         if (root.next != null) {
  325.             throw new IllegalStateException("root node has a next:" + root);
  326.         }

  327.         for (NntpThreadContainer r = root.child; r != null; r = r.next) {
  328.             if (r.threadable == null) {
  329.                 r.threadable = r.child.threadable.makeDummy();
  330.             }
  331.         }

  332.         final Threadable result = root.child == null ? null : root.child.threadable;
  333.         root.flush();

  334.         return result;
  335.     }

  336.     /**
  337.      * The client passes in a list of Threadable objects, and the Threader constructs a connected 'graph' of messages
  338.      *
  339.      * @param messages list of messages to thread, must not be empty
  340.      * @return null if messages == null or root.child == null or messages list is empty
  341.      * @since 2.2
  342.      */
  343.     public Threadable thread(final List<? extends Threadable> messages) {
  344.         return thread((Iterable<? extends Threadable>) messages);
  345.     }

  346.     // DEPRECATED METHODS - for API compatibility only - DO NOT USE

  347.     /**
  348.      * The client passes in an array of Threadable objects, and the Threader constructs a connected 'graph' of messages
  349.      *
  350.      * @param messages array of messages to thread, must not be empty
  351.      * @return null if messages == null or root.child == null or messages array is empty
  352.      * @deprecated (2.2) prefer {@link #thread(List)}
  353.      */
  354.     @Deprecated
  355.     public Threadable thread(final Threadable[] messages) {
  356.         if (messages == null) {
  357.             return null;
  358.         }
  359.         return thread(Arrays.asList(messages));
  360.     }

  361. }