Threader.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.commons.net.nntp;
- import java.util.Arrays;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- /**
- * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
- * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
- * For his Java implementation, see
- * <a href="https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">
- * https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
- */
- public class Threader {
- /**
- *
- * @param threadable
- * @param idTable
- */
- private void buildContainer(final Threadable threadable, final HashMap<String, NntpThreadContainer> idTable) {
- String id = threadable.messageThreadId();
- NntpThreadContainer container = idTable.get(id);
- int bogusIdCount = 0;
- // A NntpThreadContainer exists for this id already. This should be a forward reference, but may
- // be a duplicate id, in which case we will need to generate a bogus placeholder id
- if (container != null) {
- if (container.threadable != null) { // oops! duplicate ids...
- bogusIdCount++; // Avoid dead local store warning
- id = "<Bogus-id:" + bogusIdCount + ">";
- container = null;
- } else {
- // The container just contained a forward reference to this message, so let's
- // fill in the threadable field of the container with this message
- container.threadable = threadable;
- }
- }
- // No container exists for that message Id. Create one and insert it into the hash table.
- if (container == null) {
- container = new NntpThreadContainer();
- container.threadable = threadable;
- idTable.put(id, container);
- }
- // Iterate through all the references and create ThreadContainers for any references that
- // don't have them.
- NntpThreadContainer parentRef = null;
- {
- final String[] references = threadable.messageThreadReferences();
- for (final String refString : references) {
- NntpThreadContainer ref = idTable.get(refString);
- // if this id doesn't have a container, create one
- if (ref == null) {
- ref = new NntpThreadContainer();
- idTable.put(refString, ref);
- }
- // Link references together in the order they appear in the References: header,
- // IF they don't have a parent already &&
- // IF it will not cause a circular reference
- if (parentRef != null && ref.parent == null && parentRef != ref && !ref.findChild(parentRef)) {
- // Link ref into the parent's child list
- ref.parent = parentRef;
- ref.next = parentRef.child;
- parentRef.child = ref;
- }
- parentRef = ref;
- }
- }
- // parentRef is now set to the container of the last element in the references field. make that
- // be the parent of this container, unless doing so causes a circular reference
- if (parentRef != null && (parentRef == container || container.findChild(parentRef))) {
- parentRef = null;
- }
- // if it has a parent already, it's because we saw this message in a References: field, and presumed
- // a parent based on the other entries in that field. Now that we have the actual message, we can
- // throw away the old parent and use this new one
- if (container.parent != null) {
- NntpThreadContainer rest, prev;
- for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) {
- if (rest == container) {
- break;
- }
- }
- if (rest == null) {
- throw new IllegalStateException("Didnt find " + container + " in parent " + container.parent);
- }
- // Unlink this container from the parent's child list
- if (prev == null) {
- container.parent.child = container.next;
- } else {
- prev.next = container.next;
- }
- container.next = null;
- container.parent = null;
- }
- // If we have a parent, link container into the parents child list
- if (parentRef != null) {
- container.parent = parentRef;
- container.next = parentRef.child;
- parentRef.child = container;
- }
- }
- /**
- * Find the root set of all existing ThreadContainers
- *
- * @param idTable
- * @return root the NntpThreadContainer representing the root node
- */
- private NntpThreadContainer findRootSet(final HashMap<String, NntpThreadContainer> idTable) {
- final NntpThreadContainer root = new NntpThreadContainer();
- for (final Map.Entry<String, NntpThreadContainer> entry : idTable.entrySet()) {
- final NntpThreadContainer c = entry.getValue();
- if (c.parent == null) {
- if (c.next != null) {
- throw new IllegalStateException("c.next is " + c.next.toString());
- }
- c.next = root.child;
- root.child = c;
- }
- }
- return root;
- }
- /**
- * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
- *
- * @param root
- */
- private void gatherSubjects(final NntpThreadContainer root) {
- int count = 0;
- for (NntpThreadContainer c = root.child; c != null; c = c.next) {
- count++;
- }
- // TODO verify this will avoid rehashing
- HashMap<String, NntpThreadContainer> subjectTable = new HashMap<>((int) (count * 1.2), (float) 0.9);
- count = 0;
- for (NntpThreadContainer c = root.child; c != null; c = c.next) {
- Threadable threadable = c.threadable;
- // No threadable? If so, it is a dummy node in the root set.
- // Only root set members may be dummies, and they always have at least 2 kids
- // Take the first kid as representative of the subject
- if (threadable == null) {
- threadable = c.child.threadable;
- }
- final String subj = threadable.simplifiedSubject();
- if (subj == null || subj.isEmpty()) {
- continue;
- }
- final NntpThreadContainer old = subjectTable.get(subj);
- // Add this container to the table iff:
- // - There exists no container with this subject
- // - or this is a dummy container and the old one is not - the dummy one is
- // more interesting as a root, so put it in the table instead
- // - The container in the table has a "Re:" version of this subject, and
- // this container has a non-"Re:" version of this subject. The non-"Re:" version
- // is the more interesting of the two.
- if (old == null || c.threadable == null && old.threadable != null
- || old.threadable != null && old.threadable.subjectIsReply() && c.threadable != null && !c.threadable.subjectIsReply()) {
- subjectTable.put(subj, c);
- count++;
- }
- }
- // If the table is empty, we're done
- if (count == 0) {
- return;
- }
- // subjectTable is now populated with one entry for each subject which occurs in the
- // root set. Iterate over the root set, and gather together the difference.
- NntpThreadContainer prev, c, rest;
- for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = rest == null ? null : rest.next) {
- Threadable threadable = c.threadable;
- // is it a dummy node?
- if (threadable == null) {
- threadable = c.child.threadable;
- }
- final String subj = threadable.simplifiedSubject();
- // Don't thread together all subjectless messages
- if (subj == null || subj.isEmpty()) {
- continue;
- }
- final NntpThreadContainer old = subjectTable.get(subj);
- if (old == c) { // That's us
- continue;
- }
- // We have now found another container in the root set with the same subject
- // Remove the "second" message from the root set
- if (prev == null) {
- root.child = c.next;
- } else {
- prev.next = c.next;
- }
- c.next = null;
- if (old.threadable == null && c.threadable == null) {
- // both dummies - merge them
- NntpThreadContainer tail;
- for (tail = old.child; tail != null && tail.next != null; tail = tail.next) {
- // do nothing
- }
- if (tail != null) { // protect against possible NPE
- tail.next = c.child;
- }
- for (tail = c.child; tail != null; tail = tail.next) {
- tail.parent = old;
- }
- c.child = null;
- } else if (old.threadable == null || c.threadable != null && c.threadable.subjectIsReply() && !old.threadable.subjectIsReply()) {
- // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
- c.parent = old;
- c.next = old.child;
- old.child = c;
- } else {
- // else make the old and new messages be children of a new dummy container.
- // We create a new container object for old.msg and empty the old container
- final NntpThreadContainer newc = new NntpThreadContainer();
- newc.threadable = old.threadable;
- newc.child = old.child;
- for (NntpThreadContainer tail = newc.child; tail != null; tail = tail.next) {
- tail.parent = newc;
- }
- old.threadable = null;
- old.child = null;
- c.parent = old;
- newc.parent = old;
- // Old is now a dummy- give it 2 kids , c and newc
- old.child = c;
- c.next = newc;
- }
- // We've done a merge, so keep the same prev
- c = prev;
- }
- subjectTable.clear();
- subjectTable = null;
- }
- /**
- * Delete any empty or dummy ThreadContainers
- *
- * @param parent
- */
- private void pruneEmptyContainers(final NntpThreadContainer parent) {
- NntpThreadContainer container, prev, next;
- for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = container == null
- ? null
- : container.next) {
- // Is it empty and without any children? If so,delete it
- if (container.threadable == null && container.child == null) {
- if (prev == null) {
- parent.child = container.next;
- } else {
- prev.next = container.next;
- }
- // Set container to prev so that prev keeps its same value the next time through the loop
- container = prev;
- }
- // Else if empty, with kids, and (not at root or only one kid)
- else if (container.threadable == null && (container.parent != null || container.child.next == null)) {
- // We have an invalid/expired message with kids. Promote the kids to this level.
- NntpThreadContainer tail;
- final NntpThreadContainer kids = container.child;
- // Remove this container and replace with 'kids'.
- if (prev == null) {
- parent.child = kids;
- } else {
- prev.next = kids;
- }
- // Make each child's parent be this level's parent -> i.e. promote the children.
- // Make the last child's next point to this container's next
- // i.e. splice kids into the list in place of container
- for (tail = kids; tail.next != null; tail = tail.next) {
- tail.parent = container.parent;
- }
- tail.parent = container.parent;
- tail.next = container.next;
- // next currently points to the item after the inserted items in the chain - reset that, so we process the newly
- // promoted items next time round
- next = kids;
- // Set container to prev so that prev keeps its same value the next time through the loop
- container = prev;
- } else if (container.child != null) {
- // A real message , with kids
- // Iterate over the children
- pruneEmptyContainers(container);
- }
- }
- }
- /**
- * The client passes in a list of Iterable objects, and the Threader constructs a connected 'graph' of messages
- *
- * @param messages iterable of messages to thread, must not be empty
- * @return null if messages == null or root.child == null or messages list is empty
- * @since 3.0
- */
- public Threadable thread(final Iterable<? extends Threadable> messages) {
- if (messages == null) {
- return null;
- }
- HashMap<String, NntpThreadContainer> idTable = new HashMap<>();
- // walk through each Threadable element
- for (final Threadable t : messages) {
- if (!t.isDummy()) {
- buildContainer(t, idTable);
- }
- }
- if (idTable.isEmpty()) {
- return null;
- }
- final NntpThreadContainer root = findRootSet(idTable);
- idTable.clear();
- idTable = null;
- pruneEmptyContainers(root);
- root.reverseChildren();
- gatherSubjects(root);
- if (root.next != null) {
- throw new IllegalStateException("root node has a next:" + root);
- }
- for (NntpThreadContainer r = root.child; r != null; r = r.next) {
- if (r.threadable == null) {
- r.threadable = r.child.threadable.makeDummy();
- }
- }
- final Threadable result = root.child == null ? null : root.child.threadable;
- root.flush();
- return result;
- }
- /**
- * The client passes in a list of Threadable objects, and the Threader constructs a connected 'graph' of messages
- *
- * @param messages list of messages to thread, must not be empty
- * @return null if messages == null or root.child == null or messages list is empty
- * @since 2.2
- */
- public Threadable thread(final List<? extends Threadable> messages) {
- return thread((Iterable<? extends Threadable>) messages);
- }
- // DEPRECATED METHODS - for API compatibility only - DO NOT USE
- /**
- * The client passes in an array of Threadable objects, and the Threader constructs a connected 'graph' of messages
- *
- * @param messages array of messages to thread, must not be empty
- * @return null if messages == null or root.child == null or messages array is empty
- * @deprecated (2.2) prefer {@link #thread(List)}
- */
- @Deprecated
- public Threadable thread(final Threadable[] messages) {
- if (messages == null) {
- return null;
- }
- return thread(Arrays.asList(messages));
- }
- }