View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  
19  package org.apache.commons.net.nntp;
20  
21  /**
22   * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
23   * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
24   * For his Java implementation, see
25   * <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">
26   * http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>;
27   */
28  
29  import java.util.Arrays;
30  import java.util.HashMap;
31  import java.util.Iterator;
32  import java.util.List;
33  
34  public class Threader {
35  
36      /**
37       * The client passes in a list of Threadable objects, and
38       * the Threader constructs a connected 'graph' of messages
39       * @param messages list of messages to thread, must not be empty
40       * @return null if messages == null or root.child == null or messages list is empty
41       * @since 2.2
42       */
43      public Threadable thread(List<? extends Threadable> messages) {
44          return thread((Iterable<? extends Threadable>)messages);
45      }
46  
47      /**
48       * The client passes in a list of Iterable objects, and
49       * the Threader constructs a connected 'graph' of messages
50       * @param messages iterable of messages to thread, must not be empty
51       * @return null if messages == null or root.child == null or messages list is empty
52       * @since 3.0
53       */
54      public Threadable thread(Iterable<? extends Threadable> messages) {
55          if (messages == null) {
56              return null;
57          }
58  
59          HashMap<String,ThreadContainer> idTable = new HashMap<String,ThreadContainer>();
60  
61          // walk through each Threadable element
62          for (Threadable t : messages) {
63              if (!t.isDummy()) {
64                  buildContainer(t, idTable);
65              }
66          }
67  
68          if (idTable.isEmpty()) {
69              return null;
70          }
71  
72          ThreadContainer root = findRootSet(idTable);
73          idTable.clear();
74          idTable = null;
75  
76          pruneEmptyContainers(root);
77  
78          root.reverseChildren();
79          gatherSubjects(root);
80  
81          if (root.next != null) {
82              throw new RuntimeException("root node has a next:" + root);
83          }
84  
85          for (ThreadContainer r = root.child; r != null; r = r.next) {
86              if (r.threadable == null) {
87                  r.threadable = r.child.threadable.makeDummy();
88              }
89          }
90  
91          Threadable result = (root.child == null ? null : root.child.threadable);
92          root.flush();
93  
94          return result;
95      }
96  
97      /**
98       *
99       * @param threadable
100      * @param idTable
101      */
102     private void buildContainer(Threadable threadable, HashMap<String,ThreadContainer> idTable) {
103         String id = threadable.messageThreadId();
104         ThreadContainer container = idTable.get(id);
105         int bogusIdCount = 0;
106 
107         // A ThreadContainer exists for this id already. This should be a forward reference, but may
108         // be a duplicate id, in which case we will need to generate a bogus placeholder id
109         if (container != null) {
110             if (container.threadable != null) { // oops! duplicate ids...
111                 id = "<Bogus-id:" + (bogusIdCount++) + ">";
112                 container = null;
113             } else {
114                 // The container just contained a forward reference to this message, so let's
115                 // fill in the threadable field of the container with this message
116                 container.threadable = threadable;
117             }
118         }
119 
120         // No container exists for that message Id. Create one and insert it into the hash table.
121         if (container == null) {
122             container = new ThreadContainer();
123             container.threadable = threadable;
124             idTable.put(id, container);
125         }
126 
127         // Iterate through all of the references and create ThreadContainers for any references that
128         // don't have them.
129         ThreadContainer parentRef = null;
130         {
131             String[] references = threadable.messageThreadReferences();
132             for (String refString : references)
133             {
134                 ThreadContainer ref = idTable.get(refString);
135 
136                 // if this id doesnt have a container, create one
137                 if (ref == null) {
138                     ref = new ThreadContainer();
139                     idTable.put(refString, ref);
140                 }
141 
142                 // Link references together in the order they appear in the References: header,
143                 // IF they dont have a have a parent already &&
144                 // IF it will not cause a circular reference
145                 if ((parentRef != null)
146                     && (ref.parent == null)
147                     && (parentRef != ref)
148                     && !(ref.findChild(parentRef))) {
149                     // Link ref into the parent's child list
150                     ref.parent = parentRef;
151                     ref.next = parentRef.child;
152                     parentRef.child = ref;
153                 }
154                 parentRef = ref;
155             }
156         }
157 
158         // parentRef is now set to the container of the last element in the references field. make that
159         // be the parent of this container, unless doing so causes a circular reference
160         if (parentRef != null
161             && (parentRef == container || container.findChild(parentRef)))
162         {
163             parentRef = null;
164         }
165 
166         // if it has a parent already, its because we saw this message in a References: field, and presumed
167         // a parent based on the other entries in that field. Now that we have the actual message, we can
168         // throw away the old parent and use this new one
169         if (container.parent != null) {
170             ThreadContainer rest, prev;
171 
172             for (prev = null, rest = container.parent.child;
173                 rest != null;
174                 prev = rest, rest = rest.next) {
175                 if (rest == container) {
176                     break;
177                 }
178             }
179 
180             if (rest == null) {
181                 throw new RuntimeException(
182                     "Didnt find "
183                         + container
184                         + " in parent"
185                         + container.parent);
186             }
187 
188             // Unlink this container from the parent's child list
189             if (prev == null) {
190                 container.parent.child = container.next;
191             } else {
192                 prev.next = container.next;
193             }
194 
195             container.next = null;
196             container.parent = null;
197         }
198 
199         // If we have a parent, link container into the parents child list
200         if (parentRef != null) {
201             container.parent = parentRef;
202             container.next = parentRef.child;
203             parentRef.child = container;
204         }
205     }
206 
207     /**
208      * Find the root set of all existing ThreadContainers
209      * @param idTable
210      * @return root the ThreadContainer representing the root node
211      */
212     private ThreadContainer findRootSet(HashMap<String, ThreadContainer> idTable) {
213         ThreadContainer root = new ThreadContainer();
214         Iterator<String> iter = idTable.keySet().iterator();
215 
216         while (iter.hasNext()) {
217             Object key = iter.next();
218             ThreadContainer c = idTable.get(key);
219             if (c.parent == null) {
220                 if (c.next != null) {
221                     throw new RuntimeException(
222                             "c.next is " + c.next.toString());
223                 }
224                 c.next = root.child;
225                 root.child = c;
226             }
227         }
228         return root;
229     }
230 
231     /**
232      * Delete any empty or dummy ThreadContainers
233      * @param parent
234      */
235     private void pruneEmptyContainers(ThreadContainer parent) {
236         ThreadContainer container, prev, next;
237         for (prev = null, container = parent.child, next = container.next;
238             container != null;
239             prev = container,
240                 container = next,
241                 next = (container == null ? null : container.next)) {
242 
243             // Is it empty and without any children? If so,delete it
244             if (container.threadable == null && container.child == null) {
245                 if (prev == null) {
246                     parent.child = container.next;
247                 } else {
248                     prev.next = container.next;
249                 }
250 
251                 // Set container to prev so that prev keeps its same value the next time through the loop
252                 container = prev;
253             }
254 
255             // Else if empty, with kids, and (not at root or only one kid)
256             else if (
257                 container.threadable == null
258                     && container.child != null
259                     && (container.parent != null
260                         || container.child.next == null)) {
261                 // We have an invalid/expired message with kids. Promote the kids to this level.
262                 ThreadContainer tail;
263                 ThreadContainer kids = container.child;
264 
265                 // Remove this container and replace with 'kids'.
266                 if (prev == null) {
267                     parent.child = kids;
268                 } else {
269                     prev.next = kids;
270                 }
271 
272                 // Make each child's parent be this level's parent -> i.e. promote the children.
273                 // Make the last child's next point to this container's next
274                 // i.e. splice kids into the list in place of container
275                 for (tail = kids; tail.next != null; tail = tail.next) {
276                     tail.parent = container.parent;
277                 }
278 
279                 tail.parent = container.parent;
280                 tail.next = container.next;
281 
282                 // next currently points to the item after the inserted items in the chain - reset that so we process the newly
283                 // promoted items next time round
284                 next = kids;
285 
286                 // Set container to prev so that prev keeps its same value the next time through the loop
287                 container = prev;
288             } else if (container.child != null) {
289                 // A real message , with kids
290                 // Iterate over the children
291                 pruneEmptyContainers(container);
292             }
293         }
294     }
295 
296     /**
297      *  If any two members of the root set have the same subject, merge them.
298      *  This is to attempt to accomodate messages without References: headers.
299      * @param root
300      */
301     private void gatherSubjects(ThreadContainer root) {
302 
303         int count = 0;
304 
305         for (ThreadContainer c = root.child; c != null; c = c.next) {
306             count++;
307         }
308 
309         // TODO verify this will avoid rehashing
310         HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
311         count = 0;
312 
313         for (ThreadContainer c = root.child; c != null; c = c.next) {
314             Threadable threadable = c.threadable;
315 
316             // No threadable? If so, it is a dummy node in the root set.
317             // Only root set members may be dummies, and they alway have at least 2 kids
318             // Take the first kid as representative of the subject
319             if (threadable == null) {
320                 threadable = c.child.threadable;
321             }
322 
323             String subj = threadable.simplifiedSubject();
324 
325             if (subj == null || subj.length() == 0) {
326                 continue;
327             }
328 
329             ThreadContainer old = subjectTable.get(subj);
330 
331             // Add this container to the table iff:
332             // - There exists no container with this subject
333             // - or this is a dummy container and the old one is not - the dummy one is
334             // more interesting as a root, so put it in the table instead
335             // - The container in the table has a "Re:" version of this subject, and
336             // this container has a non-"Re:" version of this subject. The non-"Re:" version
337             // is the more interesting of the two.
338             if (old == null
339                 || (c.threadable == null && old.threadable != null)
340                 || (old.threadable != null
341                     && old.threadable.subjectIsReply()
342                     && c.threadable != null
343                     && !c.threadable.subjectIsReply())) {
344                 subjectTable.put(subj, c);
345                 count++;
346             }
347         }
348 
349         // If the table is empty, we're done
350         if (count == 0) {
351             return;
352         }
353 
354         // subjectTable is now populated with one entry for each subject which occurs in the
355         // root set. Iterate over the root set, and gather together the difference.
356         ThreadContainer prev, c, rest;
357         for (prev = null, c = root.child, rest = c.next;
358             c != null;
359             prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
360             Threadable threadable = c.threadable;
361 
362             // is it a dummy node?
363             if (threadable == null) {
364                 threadable = c.child.threadable;
365             }
366 
367             String subj = threadable.simplifiedSubject();
368 
369             // Dont thread together all subjectless messages
370             if (subj == null || subj.length() == 0) {
371                 continue;
372             }
373 
374             ThreadContainer old = subjectTable.get(subj);
375 
376             if (old == c) { // That's us
377                 continue;
378             }
379 
380             // We have now found another container in the root set with the same subject
381             // Remove the "second" message from the root set
382             if (prev == null) {
383                 root.child = c.next;
384             } else {
385                 prev.next = c.next;
386             }
387             c.next = null;
388 
389             if (old.threadable == null && c.threadable == null) {
390                 // both dummies - merge them
391                 ThreadContainer tail;
392                 for (tail = old.child;
393                     tail != null && tail.next != null;
394                     tail = tail.next) {
395                     // do nothing
396                 }
397 
398                 if (tail != null) { // protect against possible NPE
399                     tail.next = c.child;
400                 }
401 
402                 for (tail = c.child; tail != null; tail = tail.next) {
403                     tail.parent = old;
404                 }
405 
406                 c.child = null;
407             } else if (
408                 old.threadable == null
409                     || (c.threadable != null
410                         && c.threadable.subjectIsReply()
411                         && !old.threadable.subjectIsReply())) {
412                 // Else if old is empty, or c has "Re:" and old does not  ==> make this message a child of old
413                 c.parent = old;
414                 c.next = old.child;
415                 old.child = c;
416             } else {
417                 // else make the old and new messages be children of a new dummy container.
418                 // We create a new container object for old.msg and empty the old container
419                 ThreadContainer newc = new ThreadContainer();
420                 newc.threadable = old.threadable;
421                 newc.child = old.child;
422 
423                 for (ThreadContainer tail = newc.child;
424                     tail != null;
425                     tail = tail.next)
426                 {
427                     tail.parent = newc;
428                 }
429 
430                 old.threadable = null;
431                 old.child = null;
432 
433                 c.parent = old;
434                 newc.parent = old;
435 
436                 // Old is now a dummy- give it 2 kids , c and newc
437                 old.child = c;
438                 c.next = newc;
439             }
440             // We've done a merge, so keep the same prev
441             c = prev;
442         }
443 
444         subjectTable.clear();
445         subjectTable = null;
446 
447     }
448 
449 
450     // DEPRECATED METHODS - for API compatibility only - DO NOT USE
451 
452     /**
453      * The client passes in an array of Threadable objects, and
454      * the Threader constructs a connected 'graph' of messages
455      * @param messages array of messages to thread, must not be empty
456      * @return null if messages == null or root.child == null or messages array is empty
457      * @deprecated (2.2) prefer {@link #thread(List)}
458      */
459     @Deprecated
460     public Threadable thread(Threadable[] messages) {
461         if (messages == null) {
462             return null;
463         }
464         return thread(Arrays.asList(messages));
465     }
466 
467 }