View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  
19  package org.apache.commons.net.nntp;
20  
21  /**
22   * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
23   * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
24   * For his Java implementation, see
25   * <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">
26   * http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>;
27   */
28  
29  import java.util.Arrays;
30  import java.util.HashMap;
31  import java.util.Iterator;
32  import java.util.List;
33  import java.util.Map;
34  
35  public class Threader {
36  
37      /**
38       * The client passes in a list of Threadable objects, and
39       * the Threader constructs a connected 'graph' of messages
40       * @param messages list of messages to thread, must not be empty
41       * @return null if messages == null or root.child == null or messages list is empty
42       * @since 2.2
43       */
44      public Threadable thread(List<? extends Threadable> messages) {
45          return thread((Iterable<? extends Threadable>)messages);
46      }
47  
48      /**
49       * The client passes in a list of Iterable objects, and
50       * the Threader constructs a connected 'graph' of messages
51       * @param messages iterable of messages to thread, must not be empty
52       * @return null if messages == null or root.child == null or messages list is empty
53       * @since 3.0
54       */
55      public Threadable thread(Iterable<? extends Threadable> messages) {
56          if (messages == null) {
57              return null;
58          }
59  
60          HashMap<String,ThreadContainer> idTable = new HashMap<String,ThreadContainer>();
61  
62          // walk through each Threadable element
63          for (Threadable t : messages) {
64              if (!t.isDummy()) {
65                  buildContainer(t, idTable);
66              }
67          }
68  
69          if (idTable.isEmpty()) {
70              return null;
71          }
72  
73          ThreadContainer root = findRootSet(idTable);
74          idTable.clear();
75          idTable = null;
76  
77          pruneEmptyContainers(root);
78  
79          root.reverseChildren();
80          gatherSubjects(root);
81  
82          if (root.next != null) {
83              throw new RuntimeException("root node has a next:" + root);
84          }
85  
86          for (ThreadContainer r = root.child; r != null; r = r.next) {
87              if (r.threadable == null) {
88                  r.threadable = r.child.threadable.makeDummy();
89              }
90          }
91  
92          Threadable result = (root.child == null ? null : root.child.threadable);
93          root.flush();
94  
95          return result;
96      }
97  
98      /**
99       *
100      * @param threadable
101      * @param idTable
102      */
103     private void buildContainer(Threadable threadable, HashMap<String,ThreadContainer> idTable) {
104         String id = threadable.messageThreadId();
105         ThreadContainer container = idTable.get(id);
106         int bogusIdCount = 0;
107 
108         // A ThreadContainer exists for this id already. This should be a forward reference, but may
109         // be a duplicate id, in which case we will need to generate a bogus placeholder id
110         if (container != null) {
111             if (container.threadable != null) { // oops! duplicate ids...
112                 bogusIdCount++ ; // Avoid dead local store warning
113                 id = "<Bogus-id:" + (bogusIdCount) + ">";
114                 container = null;
115             } else {
116                 // The container just contained a forward reference to this message, so let's
117                 // fill in the threadable field of the container with this message
118                 container.threadable = threadable;
119             }
120         }
121 
122         // No container exists for that message Id. Create one and insert it into the hash table.
123         if (container == null) {
124             container = new ThreadContainer();
125             container.threadable = threadable;
126             idTable.put(id, container);
127         }
128 
129         // Iterate through all of the references and create ThreadContainers for any references that
130         // don't have them.
131         ThreadContainer parentRef = null;
132         {
133             String[] references = threadable.messageThreadReferences();
134             for (String refString : references)
135             {
136                 ThreadContainer ref = idTable.get(refString);
137 
138                 // if this id doesnt have a container, create one
139                 if (ref == null) {
140                     ref = new ThreadContainer();
141                     idTable.put(refString, ref);
142                 }
143 
144                 // Link references together in the order they appear in the References: header,
145                 // IF they dont have a have a parent already &&
146                 // IF it will not cause a circular reference
147                 if ((parentRef != null)
148                     && (ref.parent == null)
149                     && (parentRef != ref)
150                     && !(ref.findChild(parentRef))) {
151                     // Link ref into the parent's child list
152                     ref.parent = parentRef;
153                     ref.next = parentRef.child;
154                     parentRef.child = ref;
155                 }
156                 parentRef = ref;
157             }
158         }
159 
160         // parentRef is now set to the container of the last element in the references field. make that
161         // be the parent of this container, unless doing so causes a circular reference
162         if (parentRef != null
163             && (parentRef == container || container.findChild(parentRef)))
164         {
165             parentRef = null;
166         }
167 
168         // if it has a parent already, its because we saw this message in a References: field, and presumed
169         // a parent based on the other entries in that field. Now that we have the actual message, we can
170         // throw away the old parent and use this new one
171         if (container.parent != null) {
172             ThreadContainer rest, prev;
173 
174             for (prev = null, rest = container.parent.child;
175                 rest != null;
176                 prev = rest, rest = rest.next) {
177                 if (rest == container) {
178                     break;
179                 }
180             }
181 
182             if (rest == null) {
183                 throw new RuntimeException(
184                     "Didnt find "
185                         + container
186                         + " in parent"
187                         + container.parent);
188             }
189 
190             // Unlink this container from the parent's child list
191             if (prev == null) {
192                 container.parent.child = container.next;
193             } else {
194                 prev.next = container.next;
195             }
196 
197             container.next = null;
198             container.parent = null;
199         }
200 
201         // If we have a parent, link container into the parents child list
202         if (parentRef != null) {
203             container.parent = parentRef;
204             container.next = parentRef.child;
205             parentRef.child = container;
206         }
207     }
208 
209     /**
210      * Find the root set of all existing ThreadContainers
211      * @param idTable
212      * @return root the ThreadContainer representing the root node
213      */
214     private ThreadContainer findRootSet(HashMap<String, ThreadContainer> idTable) {
215         ThreadContainer root = new ThreadContainer();
216         Iterator<Map.Entry<String, ThreadContainer>> iter = idTable.entrySet().iterator();
217 
218         while (iter.hasNext()) {
219             Map.Entry<String, ThreadContainer> entry = iter.next();
220             ThreadContainer c = entry.getValue();
221             if (c.parent == null) {
222                 if (c.next != null) {
223                     throw new RuntimeException(
224                             "c.next is " + c.next.toString());
225                 }
226                 c.next = root.child;
227                 root.child = c;
228             }
229         }
230         return root;
231     }
232 
233     /**
234      * Delete any empty or dummy ThreadContainers
235      * @param parent
236      */
237     private void pruneEmptyContainers(ThreadContainer parent) {
238         ThreadContainer container, prev, next;
239         for (prev = null, container = parent.child, next = container.next;
240             container != null;
241             prev = container,
242                 container = next,
243                 next = (container == null ? null : container.next)) {
244 
245             // Is it empty and without any children? If so,delete it
246             if (container.threadable == null && container.child == null) {
247                 if (prev == null) {
248                     parent.child = container.next;
249                 } else {
250                     prev.next = container.next;
251                 }
252 
253                 // Set container to prev so that prev keeps its same value the next time through the loop
254                 container = prev;
255             }
256 
257             // Else if empty, with kids, and (not at root or only one kid)
258             else if (
259                 container.threadable == null
260                     && container.child != null
261                     && (container.parent != null
262                         || container.child.next == null)) {
263                 // We have an invalid/expired message with kids. Promote the kids to this level.
264                 ThreadContainer tail;
265                 ThreadContainer kids = container.child;
266 
267                 // Remove this container and replace with 'kids'.
268                 if (prev == null) {
269                     parent.child = kids;
270                 } else {
271                     prev.next = kids;
272                 }
273 
274                 // Make each child's parent be this level's parent -> i.e. promote the children.
275                 // Make the last child's next point to this container's next
276                 // i.e. splice kids into the list in place of container
277                 for (tail = kids; tail.next != null; tail = tail.next) {
278                     tail.parent = container.parent;
279                 }
280 
281                 tail.parent = container.parent;
282                 tail.next = container.next;
283 
284                 // next currently points to the item after the inserted items in the chain - reset that so we process the newly
285                 // promoted items next time round
286                 next = kids;
287 
288                 // Set container to prev so that prev keeps its same value the next time through the loop
289                 container = prev;
290             } else if (container.child != null) {
291                 // A real message , with kids
292                 // Iterate over the children
293                 pruneEmptyContainers(container);
294             }
295         }
296     }
297 
298     /**
299      *  If any two members of the root set have the same subject, merge them.
300      *  This is to attempt to accomodate messages without References: headers.
301      * @param root
302      */
303     private void gatherSubjects(ThreadContainer root) {
304 
305         int count = 0;
306 
307         for (ThreadContainer c = root.child; c != null; c = c.next) {
308             count++;
309         }
310 
311         // TODO verify this will avoid rehashing
312         HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
313         count = 0;
314 
315         for (ThreadContainer c = root.child; c != null; c = c.next) {
316             Threadable threadable = c.threadable;
317 
318             // No threadable? If so, it is a dummy node in the root set.
319             // Only root set members may be dummies, and they alway have at least 2 kids
320             // Take the first kid as representative of the subject
321             if (threadable == null) {
322                 threadable = c.child.threadable;
323             }
324 
325             String subj = threadable.simplifiedSubject();
326 
327             if (subj == null || subj.length() == 0) {
328                 continue;
329             }
330 
331             ThreadContainer old = subjectTable.get(subj);
332 
333             // Add this container to the table iff:
334             // - There exists no container with this subject
335             // - or this is a dummy container and the old one is not - the dummy one is
336             // more interesting as a root, so put it in the table instead
337             // - The container in the table has a "Re:" version of this subject, and
338             // this container has a non-"Re:" version of this subject. The non-"Re:" version
339             // is the more interesting of the two.
340             if (old == null
341                 || (c.threadable == null && old.threadable != null)
342                 || (old.threadable != null
343                     && old.threadable.subjectIsReply()
344                     && c.threadable != null
345                     && !c.threadable.subjectIsReply())) {
346                 subjectTable.put(subj, c);
347                 count++;
348             }
349         }
350 
351         // If the table is empty, we're done
352         if (count == 0) {
353             return;
354         }
355 
356         // subjectTable is now populated with one entry for each subject which occurs in the
357         // root set. Iterate over the root set, and gather together the difference.
358         ThreadContainer prev, c, rest;
359         for (prev = null, c = root.child, rest = c.next;
360             c != null;
361             prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
362             Threadable threadable = c.threadable;
363 
364             // is it a dummy node?
365             if (threadable == null) {
366                 threadable = c.child.threadable;
367             }
368 
369             String subj = threadable.simplifiedSubject();
370 
371             // Dont thread together all subjectless messages
372             if (subj == null || subj.length() == 0) {
373                 continue;
374             }
375 
376             ThreadContainer old = subjectTable.get(subj);
377 
378             if (old == c) { // That's us
379                 continue;
380             }
381 
382             // We have now found another container in the root set with the same subject
383             // Remove the "second" message from the root set
384             if (prev == null) {
385                 root.child = c.next;
386             } else {
387                 prev.next = c.next;
388             }
389             c.next = null;
390 
391             if (old.threadable == null && c.threadable == null) {
392                 // both dummies - merge them
393                 ThreadContainer tail;
394                 for (tail = old.child;
395                     tail != null && tail.next != null;
396                     tail = tail.next) {
397                     // do nothing
398                 }
399 
400                 if (tail != null) { // protect against possible NPE
401                     tail.next = c.child;
402                 }
403 
404                 for (tail = c.child; tail != null; tail = tail.next) {
405                     tail.parent = old;
406                 }
407 
408                 c.child = null;
409             } else if (
410                 old.threadable == null
411                     || (c.threadable != null
412                         && c.threadable.subjectIsReply()
413                         && !old.threadable.subjectIsReply())) {
414                 // Else if old is empty, or c has "Re:" and old does not  ==> make this message a child of old
415                 c.parent = old;
416                 c.next = old.child;
417                 old.child = c;
418             } else {
419                 // else make the old and new messages be children of a new dummy container.
420                 // We create a new container object for old.msg and empty the old container
421                 ThreadContainer newc = new ThreadContainer();
422                 newc.threadable = old.threadable;
423                 newc.child = old.child;
424 
425                 for (ThreadContainer tail = newc.child;
426                     tail != null;
427                     tail = tail.next)
428                 {
429                     tail.parent = newc;
430                 }
431 
432                 old.threadable = null;
433                 old.child = null;
434 
435                 c.parent = old;
436                 newc.parent = old;
437 
438                 // Old is now a dummy- give it 2 kids , c and newc
439                 old.child = c;
440                 c.next = newc;
441             }
442             // We've done a merge, so keep the same prev
443             c = prev;
444         }
445 
446         subjectTable.clear();
447         subjectTable = null;
448 
449     }
450 
451 
452     // DEPRECATED METHODS - for API compatibility only - DO NOT USE
453 
454     /**
455      * The client passes in an array of Threadable objects, and
456      * the Threader constructs a connected 'graph' of messages
457      * @param messages array of messages to thread, must not be empty
458      * @return null if messages == null or root.child == null or messages array is empty
459      * @deprecated (2.2) prefer {@link #thread(List)}
460      */
461     @Deprecated
462     public Threadable thread(Threadable[] messages) {
463         if (messages == null) {
464             return null;
465         }
466         return thread(Arrays.asList(messages));
467     }
468 
469 }