001    /*
002     * Copyright 2001-2005 The Apache Software Foundation
003     *
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     *
008     *     http://www.apache.org/licenses/LICENSE-2.0
009     *
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    package org.apache.commons.net.nntp;
018    
019    /**
020     * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
021     * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
022     * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
023     *  
024     * @author rwinston <rwinston@checkfree.com>
025     *
026     */
027    
028    import java.util.HashMap;
029    import java.util.Iterator;
030    
031    public class Threader {
032            private ThreadContainer root;
033            private HashMap idTable;
034            private int bogusIdCount = 0;
035    
036            /**
037             * The main threader entry point - The client passes in an array of Threadable objects, and 
038             * the Threader constructs a connected 'graph' of messages
039             * @param messages
040             * @return
041             */
042            public Threadable thread(Threadable[] messages) {
043                    if (messages == null)
044                            return null;
045    
046                    idTable = new HashMap();
047    
048                    // walk through each Threadable element
049                    for (int i = 0; i < messages.length; ++i) {
050                            if (!messages[i].isDummy())
051                                    buildContainer(messages[i]);
052                    }
053    
054                    root = findRootSet();
055                    idTable.clear();
056                    idTable = null;
057    
058                    pruneEmptyContainers(root);
059    
060                    root.reverseChildren();
061                    gatherSubjects();
062    
063                    if (root.next != null)
064                            throw new RuntimeException("root node has a next:" + root);
065    
066                    for (ThreadContainer r = root.child; r != null; r = r.next) {
067                            if (r.threadable == null)
068                                    r.threadable = r.child.threadable.makeDummy();
069                    }
070    
071                    Threadable result = (root.child == null ? null : root.child.threadable);
072                    root.flush();
073                    root = null;
074    
075                    return result;
076            }
077    
078            /**
079             * 
080             * @param threadable
081             */
082            private void buildContainer(Threadable threadable) {
083                    String id = threadable.messageThreadId();
084                    ThreadContainer container = (ThreadContainer) idTable.get(id);
085    
086                    // A ThreadContainer exists for this id already. This should be a forward reference, but may 
087                    // be a duplicate id, in which case we will need to generate a bogus placeholder id
088                    if (container != null) {
089                            if (container.threadable != null) { // oops! duplicate ids...
090                                    id = "<Bogus-id:" + (bogusIdCount++) + ">";
091                                    container = null;
092                            } else {
093                                    // The container just contained a forward reference to this message, so let's
094                                    // fill in the threadable field of the container with this message
095                                    container.threadable = threadable;
096                            }
097                    }
098    
099                    // No container exists for that message Id. Create one and insert it into the hash table.
100                    if (container == null) {
101                            container = new ThreadContainer();
102                            container.threadable = threadable;
103                            idTable.put(id, container);
104                    }
105    
106                    // Iterate through all of the references and create ThreadContainers for any references that
107                    // don't have them.
108                    ThreadContainer parentRef = null;
109                    {
110                            String[] references = threadable.messageThreadReferences();
111                            for (int i = 0; i < references.length; ++i) {
112                                    String refString = references[i];
113                                    ThreadContainer ref = (ThreadContainer) idTable.get(refString);
114    
115                                    // if this id doesnt have a container, create one
116                                    if (ref == null) {
117                                            ref = new ThreadContainer();
118                                            idTable.put(refString, ref);
119                                    }
120    
121                                    // Link references together in the order they appear in the References: header,
122                                    // IF they dont have a have a parent already &&
123                                    // IF it will not cause a circular reference
124                                    if ((parentRef != null)
125                                            && (ref.parent == null)
126                                            && (parentRef != ref)
127                                            && !(parentRef.findChild(ref))) {
128                                            // Link ref into the parent's child list
129                                            ref.parent = parentRef;
130                                            ref.next = parentRef.child;
131                                            parentRef.child = ref;
132                                    }
133                                    parentRef = ref;
134                            }
135                    }
136    
137                    // parentRef is now set to the container of the last element in the references field. make that 
138                    // be the parent of this container, unless doing so causes a circular reference
139                    if (parentRef != null
140                            && (parentRef == container || container.findChild(parentRef)))
141                            parentRef = null;
142    
143                    // if it has a parent already, its because we saw this message in a References: field, and presumed
144                    // a parent based on the other entries in that field. Now that we have the actual message, we can
145                    // throw away the old parent and use this new one
146                    if (container.parent != null) {
147                            ThreadContainer rest, prev;
148    
149                            for (prev = null, rest = container.parent.child;
150                                    rest != null;
151                                    prev = rest, rest = rest.next) {
152                                    if (rest == container)
153                                            break;
154                            }
155    
156                            if (rest == null) {
157                                    throw new RuntimeException(
158                                            "Didnt find "
159                                                    + container
160                                                    + " in parent"
161                                                    + container.parent);
162                            }
163    
164                            // Unlink this container from the parent's child list
165                            if (prev == null)
166                                    container.parent.child = container.next;
167                            else
168                                    prev.next = container.next;
169    
170                            container.next = null;
171                            container.parent = null;
172                    }
173    
174                    // If we have a parent, link container into the parents child list
175                    if (parentRef != null) {
176                            container.parent = parentRef;
177                            container.next = parentRef.child;
178                            parentRef.child = container;
179                    }
180            }
181    
182            /**
183             * Find the root set of all existing ThreadContainers
184             * @return root the ThreadContainer representing the root node
185             */
186            private ThreadContainer findRootSet() {
187                    ThreadContainer root = new ThreadContainer();
188                    Iterator iter = idTable.keySet().iterator();
189    
190                    while (iter.hasNext()) {
191                            Object key = iter.next();
192                            ThreadContainer c = (ThreadContainer) idTable.get(key);
193                            if (c.parent == null) {
194                                    if (c.next != null)
195                                            throw new RuntimeException(
196                                                    "c.next is " + c.next.toString());
197                                    c.next = root.child;
198                                    root.child = c;
199                            }
200                    }
201                    return root;
202            }
203    
204            /**
205             * Delete any empty or dummy ThreadContainers
206             * @param parent
207             */
208            private void pruneEmptyContainers(ThreadContainer parent) {
209                    ThreadContainer container, prev, next;
210                    for (prev = null, container = parent.child, next = container.next;
211                            container != null;
212                            prev = container,
213                                    container = next,
214                                    next = (container == null ? null : container.next)) {
215    
216                            // Is it empty and without any children? If so,delete it 
217                            if (container.threadable == null && container.child == null) {
218                                    if (prev == null)
219                                            parent.child = container.next;
220                                    else
221                                            prev.next = container.next;
222    
223                                    // Set container to prev so that prev keeps its same value the next time through the loop       
224                                    container = prev;
225                            }
226    
227                            // Else if empty, with kids, and (not at root or only one kid)
228                            else if (
229                                    container.threadable == null
230                                            && container.child != null
231                                            && (container.parent != null
232                                                    || container.child.next == null)) {
233                                    // We have an invalid/expired message with kids. Promote the kids to this level. 
234                                    ThreadContainer tail;
235                                    ThreadContainer kids = container.child;
236    
237                                    // Remove this container and replace with 'kids'.
238                                    if (prev == null)
239                                            parent.child = kids;
240                                    else
241                                            prev.next = kids;
242    
243                                    // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
244                                    // i.e. splice kids into the list in place of container
245                                    for (tail = kids; tail.next != null; tail = tail.next)
246                                            tail.parent = container.parent;
247    
248                                    tail.parent = container.parent;
249                                    tail.next = container.next;
250    
251                                    // next currently points to the item after the inserted items in the chain - reset that so we process the newly
252                                    // promoted items next time round
253                                    next = kids;
254    
255                                    // Set container to prev so that prev keeps its same value the next time through the loop
256                                    container = prev;
257                            } else if (container.child != null) {
258                                    // A real message , with kids
259                                    // Iterate over the children
260                                    pruneEmptyContainers(container);
261                            }
262                    }
263            }
264    
265            /**
266             *  If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. 
267             */
268            private void gatherSubjects() {
269    
270                    int count = 0;
271    
272                    for (ThreadContainer c = root.child; c != null; c = c.next)
273                            count++;
274    
275                    // TODO verify this will avoid rehashing
276                    HashMap subjectTable = new HashMap((int) (count * 1.2), (float) 0.9);
277                    count = 0;
278    
279                    for (ThreadContainer c = root.child; c != null; c = c.next) {
280                            Threadable threadable = c.threadable;
281    
282                            // No threadable? If so, it is a dummy node in the root set.
283                            // Only root set members may be dummies, and they alway have at least 2 kids
284                            // Take the first kid as representative of the subject
285                            if (threadable == null)
286                                    threadable = c.child.threadable;
287    
288                            String subj = threadable.simplifiedSubject();
289    
290                            if (subj == null || subj == "")
291                                    continue;
292    
293                            ThreadContainer old = (ThreadContainer) subjectTable.get(subj);
294    
295                            // Add this container to the table iff:
296                            // - There exists no container with this subject
297                            // - or this is a dummy container and the old one is not - the dummy one is
298                            // more interesting as a root, so put it in the table instead
299                            // - The container in the table has a "Re:" version of this subject, and 
300                            // this container has a non-"Re:" version of this subject. The non-"Re:" version
301                            // is the more interesting of the two.
302                            if (old == null
303                                    || (c.threadable == null && old.threadable != null)
304                                    || (old.threadable != null
305                                            && old.threadable.subjectIsReply()
306                                            && c.threadable != null
307                                            && !c.threadable.subjectIsReply())) {
308                                    subjectTable.put(subj, c);
309                                    count++;
310                            }
311                    }
312    
313                    // If the table is empty, we're done
314                    if (count == 0)
315                            return;
316    
317                    // subjectTable is now populated with one entry for each subject which occurs in the 
318                    // root set. Iterate over the root set, and gather together the difference.
319                    ThreadContainer prev, c, rest;
320                    for (prev = null, c = root.child, rest = c.next;
321                            c != null;
322                            prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
323                            Threadable threadable = c.threadable;
324    
325                            // is it a dummy node?
326                            if (threadable == null)
327                                    threadable = c.child.threadable;
328    
329                            String subj = threadable.simplifiedSubject();
330    
331                            // Dont thread together all subjectless messages
332                            if (subj == null || subj == "")
333                                    continue;
334    
335                            ThreadContainer old = (ThreadContainer) subjectTable.get(subj);
336    
337                            if (old == c) // That's us
338                                    continue;
339    
340                            // We have now found another container in the root set with the same subject
341                            // Remove the "second" message from the root set
342                            if (prev == null)
343                                    root.child = c.next;
344                            else
345                                    prev.next = c.next;
346                            c.next = null;
347    
348                            if (old.threadable == null && c.threadable == null) {
349                                    // both dummies - merge them
350                                    ThreadContainer tail;
351                                    for (tail = old.child;
352                                            tail != null && tail.next != null;
353                                            tail = tail.next);
354    
355                                    tail.next = c.child;
356    
357                                    for (tail = c.child; tail != null; tail = tail.next)
358                                            tail.parent = old;
359    
360                                    c.child = null;
361                            } else if (
362                                    old.threadable == null
363                                            || (c.threadable != null
364                                                    && c.threadable.subjectIsReply()
365                                                    && !old.threadable.subjectIsReply())) {
366                                    // Else if old is empty, or c has "Re:" and old does not  ==> make this message a child of old
367                                    c.parent = old;
368                                    c.next = old.child;
369                                    old.child = c;
370                            } else {
371                                    //      else make the old and new messages be children of a new dummy container.
372                                    // We create a new container object for old.msg and empty the old container
373                                    ThreadContainer newc = new ThreadContainer();
374                                    newc.threadable = old.threadable;
375                                    newc.child = old.child;
376    
377                                    for (ThreadContainer tail = newc.child;
378                                            tail != null;
379                                            tail = tail.next)
380                                            tail.parent = newc;
381    
382                                    old.threadable = null;
383                                    old.child = null;
384    
385                                    c.parent = old;
386                                    newc.parent = old;
387    
388                                    // Old is now a dummy- give it 2 kids , c and newc
389                                    old.child = c;
390                                    c.next = newc;
391                            }
392                            // We've done a merge, so keep the same prev
393                            c = prev;
394                    }
395    
396                    subjectTable.clear();
397                    subjectTable = null;
398    
399            }
400    }
401    
402    /**
403     * A placeholder utility class, used for constructing a tree of Threadables
404     * Originall implementation by Jamie Zawinski. 
405     * See the Grendel source for more details <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java#511">here</a>
406     * Threadable objects
407     * @author Rory Winston <rwinston@checkfree.com>
408     */
409    class ThreadContainer {
410            Threadable threadable;
411            ThreadContainer parent;
412            ThreadContainer prev;
413            ThreadContainer next;
414            ThreadContainer child;
415    
416            /**
417             * 
418             * @param container
419             * @return true if child is under self's tree. Detects circular references
420             */
421            boolean findChild(ThreadContainer target) {
422                    if (child == null)
423                            return false;
424    
425                    else if (child == target)
426                            return true;
427                    else
428                            return child.findChild(target);
429            }
430    
431            // Copy the ThreadContainer tree structure down into the underlying Threadable objects
432            // (Make the Threadable tree look like the ThreadContainer tree)
433            void flush() {
434                    if (parent != null && threadable == null)
435                            throw new RuntimeException("no threadable in " + this.toString());
436    
437                    parent = null;
438    
439                    if (threadable != null)
440                            threadable.setChild(child == null ? null : child.threadable);
441    
442                    if (child != null) {
443                            child.flush();
444                            child = null;
445                    }
446    
447                    if (threadable != null)
448                            threadable.setNext(next == null ? null : next.threadable);
449    
450                    if (next != null) {
451                            next.flush();
452                            next = null;
453                    }
454    
455                    threadable = null;
456            }
457    
458            /**
459             * Reverse the entire set of children
460             *
461             */
462            void reverseChildren() {
463                    if (child != null) {
464                            ThreadContainer kid, prev, rest;
465                            for (prev = null, kid = child, rest = kid.next;
466                                    kid != null;
467                                    prev = kid,
468                                            kid = rest,
469                                            rest = (rest == null ? null : rest.next))
470                                    kid.next = prev;
471    
472                            child = prev;
473    
474                            // Do it for the kids 
475                            for (kid = child; kid != null; kid = kid.next)
476                                    kid.reverseChildren();
477                    }
478            }
479    }