001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018
019package org.apache.commons.net.nntp;
020
021/**
022 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
023 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
024 * For his Java implementation, see
025 * <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">
026 * http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
027 */
028
029import java.util.Arrays;
030import java.util.HashMap;
031import java.util.Iterator;
032import java.util.List;
033import java.util.Map;
034
035public class Threader {
036
037    /**
038     * The client passes in a list of Threadable objects, and
039     * the Threader constructs a connected 'graph' of messages
040     * @param messages list of messages to thread, must not be empty
041     * @return null if messages == null or root.child == null or messages list is empty
042     * @since 2.2
043     */
044    public Threadable thread(List<? extends Threadable> messages) {
045        return thread((Iterable<? extends Threadable>)messages);
046    }
047
048    /**
049     * The client passes in a list of Iterable objects, and
050     * the Threader constructs a connected 'graph' of messages
051     * @param messages iterable of messages to thread, must not be empty
052     * @return null if messages == null or root.child == null or messages list is empty
053     * @since 3.0
054     */
055    public Threadable thread(Iterable<? extends Threadable> messages) {
056        if (messages == null) {
057            return null;
058        }
059
060        HashMap<String,ThreadContainer> idTable = new HashMap<String,ThreadContainer>();
061
062        // walk through each Threadable element
063        for (Threadable t : messages) {
064            if (!t.isDummy()) {
065                buildContainer(t, idTable);
066            }
067        }
068
069        if (idTable.isEmpty()) {
070            return null;
071        }
072
073        ThreadContainer root = findRootSet(idTable);
074        idTable.clear();
075        idTable = null;
076
077        pruneEmptyContainers(root);
078
079        root.reverseChildren();
080        gatherSubjects(root);
081
082        if (root.next != null) {
083            throw new RuntimeException("root node has a next:" + root);
084        }
085
086        for (ThreadContainer r = root.child; r != null; r = r.next) {
087            if (r.threadable == null) {
088                r.threadable = r.child.threadable.makeDummy();
089            }
090        }
091
092        Threadable result = (root.child == null ? null : root.child.threadable);
093        root.flush();
094
095        return result;
096    }
097
098    /**
099     *
100     * @param threadable
101     * @param idTable
102     */
103    private void buildContainer(Threadable threadable, HashMap<String,ThreadContainer> idTable) {
104        String id = threadable.messageThreadId();
105        ThreadContainer container = idTable.get(id);
106        int bogusIdCount = 0;
107
108        // A ThreadContainer exists for this id already. This should be a forward reference, but may
109        // be a duplicate id, in which case we will need to generate a bogus placeholder id
110        if (container != null) {
111            if (container.threadable != null) { // oops! duplicate ids...
112                bogusIdCount++ ; // Avoid dead local store warning
113                id = "<Bogus-id:" + (bogusIdCount) + ">";
114                container = null;
115            } else {
116                // The container just contained a forward reference to this message, so let's
117                // fill in the threadable field of the container with this message
118                container.threadable = threadable;
119            }
120        }
121
122        // No container exists for that message Id. Create one and insert it into the hash table.
123        if (container == null) {
124            container = new ThreadContainer();
125            container.threadable = threadable;
126            idTable.put(id, container);
127        }
128
129        // Iterate through all of the references and create ThreadContainers for any references that
130        // don't have them.
131        ThreadContainer parentRef = null;
132        {
133            String[] references = threadable.messageThreadReferences();
134            for (String refString : references)
135            {
136                ThreadContainer ref = idTable.get(refString);
137
138                // if this id doesnt have a container, create one
139                if (ref == null) {
140                    ref = new ThreadContainer();
141                    idTable.put(refString, ref);
142                }
143
144                // Link references together in the order they appear in the References: header,
145                // IF they dont have a have a parent already &&
146                // IF it will not cause a circular reference
147                if ((parentRef != null)
148                    && (ref.parent == null)
149                    && (parentRef != ref)
150                    && !(ref.findChild(parentRef))) {
151                    // Link ref into the parent's child list
152                    ref.parent = parentRef;
153                    ref.next = parentRef.child;
154                    parentRef.child = ref;
155                }
156                parentRef = ref;
157            }
158        }
159
160        // parentRef is now set to the container of the last element in the references field. make that
161        // be the parent of this container, unless doing so causes a circular reference
162        if (parentRef != null
163            && (parentRef == container || container.findChild(parentRef)))
164        {
165            parentRef = null;
166        }
167
168        // if it has a parent already, its because we saw this message in a References: field, and presumed
169        // a parent based on the other entries in that field. Now that we have the actual message, we can
170        // throw away the old parent and use this new one
171        if (container.parent != null) {
172            ThreadContainer rest, prev;
173
174            for (prev = null, rest = container.parent.child;
175                rest != null;
176                prev = rest, rest = rest.next) {
177                if (rest == container) {
178                    break;
179                }
180            }
181
182            if (rest == null) {
183                throw new RuntimeException(
184                    "Didnt find "
185                        + container
186                        + " in parent"
187                        + container.parent);
188            }
189
190            // Unlink this container from the parent's child list
191            if (prev == null) {
192                container.parent.child = container.next;
193            } else {
194                prev.next = container.next;
195            }
196
197            container.next = null;
198            container.parent = null;
199        }
200
201        // If we have a parent, link container into the parents child list
202        if (parentRef != null) {
203            container.parent = parentRef;
204            container.next = parentRef.child;
205            parentRef.child = container;
206        }
207    }
208
209    /**
210     * Find the root set of all existing ThreadContainers
211     * @param idTable
212     * @return root the ThreadContainer representing the root node
213     */
214    private ThreadContainer findRootSet(HashMap<String, ThreadContainer> idTable) {
215        ThreadContainer root = new ThreadContainer();
216        Iterator<Map.Entry<String, ThreadContainer>> iter = idTable.entrySet().iterator();
217
218        while (iter.hasNext()) {
219            Map.Entry<String, ThreadContainer> entry = iter.next();
220            ThreadContainer c = entry.getValue();
221            if (c.parent == null) {
222                if (c.next != null) {
223                    throw new RuntimeException(
224                            "c.next is " + c.next.toString());
225                }
226                c.next = root.child;
227                root.child = c;
228            }
229        }
230        return root;
231    }
232
233    /**
234     * Delete any empty or dummy ThreadContainers
235     * @param parent
236     */
237    private void pruneEmptyContainers(ThreadContainer parent) {
238        ThreadContainer container, prev, next;
239        for (prev = null, container = parent.child, next = container.next;
240            container != null;
241            prev = container,
242                container = next,
243                next = (container == null ? null : container.next)) {
244
245            // Is it empty and without any children? If so,delete it
246            if (container.threadable == null && container.child == null) {
247                if (prev == null) {
248                    parent.child = container.next;
249                } else {
250                    prev.next = container.next;
251                }
252
253                // Set container to prev so that prev keeps its same value the next time through the loop
254                container = prev;
255            }
256
257            // Else if empty, with kids, and (not at root or only one kid)
258            else if (
259                container.threadable == null
260                    && container.child != null
261                    && (container.parent != null
262                        || container.child.next == null)) {
263                // We have an invalid/expired message with kids. Promote the kids to this level.
264                ThreadContainer tail;
265                ThreadContainer kids = container.child;
266
267                // Remove this container and replace with 'kids'.
268                if (prev == null) {
269                    parent.child = kids;
270                } else {
271                    prev.next = kids;
272                }
273
274                // Make each child's parent be this level's parent -> i.e. promote the children.
275                // Make the last child's next point to this container's next
276                // i.e. splice kids into the list in place of container
277                for (tail = kids; tail.next != null; tail = tail.next) {
278                    tail.parent = container.parent;
279                }
280
281                tail.parent = container.parent;
282                tail.next = container.next;
283
284                // next currently points to the item after the inserted items in the chain - reset that so we process the newly
285                // promoted items next time round
286                next = kids;
287
288                // Set container to prev so that prev keeps its same value the next time through the loop
289                container = prev;
290            } else if (container.child != null) {
291                // A real message , with kids
292                // Iterate over the children
293                pruneEmptyContainers(container);
294            }
295        }
296    }
297
298    /**
299     *  If any two members of the root set have the same subject, merge them.
300     *  This is to attempt to accomodate messages without References: headers.
301     * @param root
302     */
303    private void gatherSubjects(ThreadContainer root) {
304
305        int count = 0;
306
307        for (ThreadContainer c = root.child; c != null; c = c.next) {
308            count++;
309        }
310
311        // TODO verify this will avoid rehashing
312        HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
313        count = 0;
314
315        for (ThreadContainer c = root.child; c != null; c = c.next) {
316            Threadable threadable = c.threadable;
317
318            // No threadable? If so, it is a dummy node in the root set.
319            // Only root set members may be dummies, and they alway have at least 2 kids
320            // Take the first kid as representative of the subject
321            if (threadable == null) {
322                threadable = c.child.threadable;
323            }
324
325            String subj = threadable.simplifiedSubject();
326
327            if (subj == null || subj.length() == 0) {
328                continue;
329            }
330
331            ThreadContainer old = subjectTable.get(subj);
332
333            // Add this container to the table iff:
334            // - There exists no container with this subject
335            // - or this is a dummy container and the old one is not - the dummy one is
336            // more interesting as a root, so put it in the table instead
337            // - The container in the table has a "Re:" version of this subject, and
338            // this container has a non-"Re:" version of this subject. The non-"Re:" version
339            // is the more interesting of the two.
340            if (old == null
341                || (c.threadable == null && old.threadable != null)
342                || (old.threadable != null
343                    && old.threadable.subjectIsReply()
344                    && c.threadable != null
345                    && !c.threadable.subjectIsReply())) {
346                subjectTable.put(subj, c);
347                count++;
348            }
349        }
350
351        // If the table is empty, we're done
352        if (count == 0) {
353            return;
354        }
355
356        // subjectTable is now populated with one entry for each subject which occurs in the
357        // root set. Iterate over the root set, and gather together the difference.
358        ThreadContainer prev, c, rest;
359        for (prev = null, c = root.child, rest = c.next;
360            c != null;
361            prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
362            Threadable threadable = c.threadable;
363
364            // is it a dummy node?
365            if (threadable == null) {
366                threadable = c.child.threadable;
367            }
368
369            String subj = threadable.simplifiedSubject();
370
371            // Dont thread together all subjectless messages
372            if (subj == null || subj.length() == 0) {
373                continue;
374            }
375
376            ThreadContainer old = subjectTable.get(subj);
377
378            if (old == c) { // That's us
379                continue;
380            }
381
382            // We have now found another container in the root set with the same subject
383            // Remove the "second" message from the root set
384            if (prev == null) {
385                root.child = c.next;
386            } else {
387                prev.next = c.next;
388            }
389            c.next = null;
390
391            if (old.threadable == null && c.threadable == null) {
392                // both dummies - merge them
393                ThreadContainer tail;
394                for (tail = old.child;
395                    tail != null && tail.next != null;
396                    tail = tail.next) {
397                    // do nothing
398                }
399
400                if (tail != null) { // protect against possible NPE
401                    tail.next = c.child;
402                }
403
404                for (tail = c.child; tail != null; tail = tail.next) {
405                    tail.parent = old;
406                }
407
408                c.child = null;
409            } else if (
410                old.threadable == null
411                    || (c.threadable != null
412                        && c.threadable.subjectIsReply()
413                        && !old.threadable.subjectIsReply())) {
414                // Else if old is empty, or c has "Re:" and old does not  ==> make this message a child of old
415                c.parent = old;
416                c.next = old.child;
417                old.child = c;
418            } else {
419                // else make the old and new messages be children of a new dummy container.
420                // We create a new container object for old.msg and empty the old container
421                ThreadContainer newc = new ThreadContainer();
422                newc.threadable = old.threadable;
423                newc.child = old.child;
424
425                for (ThreadContainer tail = newc.child;
426                    tail != null;
427                    tail = tail.next)
428                {
429                    tail.parent = newc;
430                }
431
432                old.threadable = null;
433                old.child = null;
434
435                c.parent = old;
436                newc.parent = old;
437
438                // Old is now a dummy- give it 2 kids , c and newc
439                old.child = c;
440                c.next = newc;
441            }
442            // We've done a merge, so keep the same prev
443            c = prev;
444        }
445
446        subjectTable.clear();
447        subjectTable = null;
448
449    }
450
451
452    // DEPRECATED METHODS - for API compatibility only - DO NOT USE
453
454    /**
455     * The client passes in an array of Threadable objects, and
456     * the Threader constructs a connected 'graph' of messages
457     * @param messages array of messages to thread, must not be empty
458     * @return null if messages == null or root.child == null or messages array is empty
459     * @deprecated (2.2) prefer {@link #thread(List)}
460     */
461    @Deprecated
462    public Threadable thread(Threadable[] messages) {
463        if (messages == null) {
464            return null;
465        }
466        return thread(Arrays.asList(messages));
467    }
468
469}