View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  
19  package org.apache.commons.net.nntp;
20  
21  /**
22   * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
23   * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
24   * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
25   *
26   * @author rwinston <rwinston@checkfree.com>
27   *
28   */
29  
30  import java.util.HashMap;
31  import java.util.Iterator;
32  import java.util.List;
33  
34  public class Threader {
35      private ThreadContainer root;
36      private HashMap<String,ThreadContainer> idTable;
37      private int bogusIdCount = 0;
38  
39      /**
40       * The client passes in a list of Threadable objects, and
41       * the Threader constructs a connected 'graph' of messages
42       * @param messages list of messages to thread
43       * @return null if messages == null or root.child == null
44       * @since 2.2
45       */
46      public Threadable thread(List<? extends Threadable> messages) {
47          return thread((Iterable<? extends Threadable>)messages);
48      }
49  
50      /**
51       * The client passes in a list of Iterable objects, and
52       * the Threader constructs a connected 'graph' of messages
53       * @param messages iterable of messages to thread
54       * @return null if messages == null or root.child == null
55       * @since 3.0
56       */
57      public Threadable thread(Iterable<? extends Threadable> messages) {
58          if (messages == null) {
59              return null;
60          }
61  
62          idTable = new HashMap<String,ThreadContainer>();
63  
64          // walk through each Threadable element
65          for (Threadable t : messages) {
66              if (!t.isDummy()) {
67                  buildContainer(t);
68              }
69          }
70  
71          root = findRootSet();
72          idTable.clear();
73          idTable = null;
74  
75          pruneEmptyContainers(root);
76  
77          root.reverseChildren();
78          gatherSubjects();
79  
80          if (root.next != null) {
81              throw new RuntimeException("root node has a next:" + root);
82          }
83  
84          for (ThreadContainer r = root.child; r != null; r = r.next) {
85              if (r.threadable == null) {
86                  r.threadable = r.child.threadable.makeDummy();
87              }
88          }
89  
90          Threadable result = (root.child == null ? null : root.child.threadable);
91          root.flush();
92          root = null;
93  
94          return result;
95      }
96  
97      /**
98       *
99       * @param threadable
100      */
101     private void buildContainer(Threadable threadable) {
102         String id = threadable.messageThreadId();
103         ThreadContainer container = idTable.get(id);
104 
105         // A ThreadContainer exists for this id already. This should be a forward reference, but may
106         // be a duplicate id, in which case we will need to generate a bogus placeholder id
107         if (container != null) {
108             if (container.threadable != null) { // oops! duplicate ids...
109                 id = "<Bogus-id:" + (bogusIdCount++) + ">";
110                 container = null;
111             } else {
112                 // The container just contained a forward reference to this message, so let's
113                 // fill in the threadable field of the container with this message
114                 container.threadable = threadable;
115             }
116         }
117 
118         // No container exists for that message Id. Create one and insert it into the hash table.
119         if (container == null) {
120             container = new ThreadContainer();
121             container.threadable = threadable;
122             idTable.put(id, container);
123         }
124 
125         // Iterate through all of the references and create ThreadContainers for any references that
126         // don't have them.
127         ThreadContainer parentRef = null;
128         {
129             String[] references = threadable.messageThreadReferences();
130             for (String refString : references)
131             {
132                 ThreadContainer ref = idTable.get(refString);
133 
134                 // if this id doesnt have a container, create one
135                 if (ref == null) {
136                     ref = new ThreadContainer();
137                     idTable.put(refString, ref);
138                 }
139 
140                 // Link references together in the order they appear in the References: header,
141                 // IF they dont have a have a parent already &&
142                 // IF it will not cause a circular reference
143                 if ((parentRef != null)
144                     && (ref.parent == null)
145                     && (parentRef != ref)
146                     && !(ref.findChild(parentRef))) {
147                     // Link ref into the parent's child list
148                     ref.parent = parentRef;
149                     ref.next = parentRef.child;
150                     parentRef.child = ref;
151                 }
152                 parentRef = ref;
153             }
154         }
155 
156         // parentRef is now set to the container of the last element in the references field. make that
157         // be the parent of this container, unless doing so causes a circular reference
158         if (parentRef != null
159             && (parentRef == container || container.findChild(parentRef)))
160         {
161             parentRef = null;
162         }
163 
164         // if it has a parent already, its because we saw this message in a References: field, and presumed
165         // a parent based on the other entries in that field. Now that we have the actual message, we can
166         // throw away the old parent and use this new one
167         if (container.parent != null) {
168             ThreadContainer rest, prev;
169 
170             for (prev = null, rest = container.parent.child;
171                 rest != null;
172                 prev = rest, rest = rest.next) {
173                 if (rest == container) {
174                     break;
175                 }
176             }
177 
178             if (rest == null) {
179                 throw new RuntimeException(
180                     "Didnt find "
181                         + container
182                         + " in parent"
183                         + container.parent);
184             }
185 
186             // Unlink this container from the parent's child list
187             if (prev == null) {
188                 container.parent.child = container.next;
189             } else {
190                 prev.next = container.next;
191             }
192 
193             container.next = null;
194             container.parent = null;
195         }
196 
197         // If we have a parent, link container into the parents child list
198         if (parentRef != null) {
199             container.parent = parentRef;
200             container.next = parentRef.child;
201             parentRef.child = container;
202         }
203     }
204 
205     /**
206      * Find the root set of all existing ThreadContainers
207      * @return root the ThreadContainer representing the root node
208      */
209     private ThreadContainer findRootSet() {
210         ThreadContainer root = new ThreadContainer();
211         Iterator<String> iter = idTable.keySet().iterator();
212 
213         while (iter.hasNext()) {
214             Object key = iter.next();
215             ThreadContainer c = idTable.get(key);
216             if (c.parent == null) {
217                 if (c.next != null) {
218                     throw new RuntimeException(
219                             "c.next is " + c.next.toString());
220                 }
221                 c.next = root.child;
222                 root.child = c;
223             }
224         }
225         return root;
226     }
227 
228     /**
229      * Delete any empty or dummy ThreadContainers
230      * @param parent
231      */
232     private void pruneEmptyContainers(ThreadContainer parent) {
233         ThreadContainer container, prev, next;
234         for (prev = null, container = parent.child, next = container.next;
235             container != null;
236             prev = container,
237                 container = next,
238                 next = (container == null ? null : container.next)) {
239 
240             // Is it empty and without any children? If so,delete it
241             if (container.threadable == null && container.child == null) {
242                 if (prev == null) {
243                     parent.child = container.next;
244                 } else {
245                     prev.next = container.next;
246                 }
247 
248                 // Set container to prev so that prev keeps its same value the next time through the loop
249                 container = prev;
250             }
251 
252             // Else if empty, with kids, and (not at root or only one kid)
253             else if (
254                 container.threadable == null
255                     && container.child != null
256                     && (container.parent != null
257                         || container.child.next == null)) {
258                 // We have an invalid/expired message with kids. Promote the kids to this level.
259                 ThreadContainer tail;
260                 ThreadContainer kids = container.child;
261 
262                 // Remove this container and replace with 'kids'.
263                 if (prev == null) {
264                     parent.child = kids;
265                 } else {
266                     prev.next = kids;
267                 }
268 
269                 // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
270                 // i.e. splice kids into the list in place of container
271                 for (tail = kids; tail.next != null; tail = tail.next) {
272                     tail.parent = container.parent;
273                 }
274 
275                 tail.parent = container.parent;
276                 tail.next = container.next;
277 
278                 // next currently points to the item after the inserted items in the chain - reset that so we process the newly
279                 // promoted items next time round
280                 next = kids;
281 
282                 // Set container to prev so that prev keeps its same value the next time through the loop
283                 container = prev;
284             } else if (container.child != null) {
285                 // A real message , with kids
286                 // Iterate over the children
287                 pruneEmptyContainers(container);
288             }
289         }
290     }
291 
292     /**
293      *  If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
294      */
295     private void gatherSubjects() {
296 
297         int count = 0;
298 
299         for (ThreadContainer c = root.child; c != null; c = c.next) {
300             count++;
301         }
302 
303         // TODO verify this will avoid rehashing
304         HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
305         count = 0;
306 
307         for (ThreadContainer c = root.child; c != null; c = c.next) {
308             Threadable threadable = c.threadable;
309 
310             // No threadable? If so, it is a dummy node in the root set.
311             // Only root set members may be dummies, and they alway have at least 2 kids
312             // Take the first kid as representative of the subject
313             if (threadable == null) {
314                 threadable = c.child.threadable;
315             }
316 
317             String subj = threadable.simplifiedSubject();
318 
319             if (subj == null || subj.length() == 0) {
320                 continue;
321             }
322 
323             ThreadContainer old = subjectTable.get(subj);
324 
325             // Add this container to the table iff:
326             // - There exists no container with this subject
327             // - or this is a dummy container and the old one is not - the dummy one is
328             // more interesting as a root, so put it in the table instead
329             // - The container in the table has a "Re:" version of this subject, and
330             // this container has a non-"Re:" version of this subject. The non-"Re:" version
331             // is the more interesting of the two.
332             if (old == null
333                 || (c.threadable == null && old.threadable != null)
334                 || (old.threadable != null
335                     && old.threadable.subjectIsReply()
336                     && c.threadable != null
337                     && !c.threadable.subjectIsReply())) {
338                 subjectTable.put(subj, c);
339                 count++;
340             }
341         }
342 
343         // If the table is empty, we're done
344         if (count == 0) {
345             return;
346         }
347 
348         // subjectTable is now populated with one entry for each subject which occurs in the
349         // root set. Iterate over the root set, and gather together the difference.
350         ThreadContainer prev, c, rest;
351         for (prev = null, c = root.child, rest = c.next;
352             c != null;
353             prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
354             Threadable threadable = c.threadable;
355 
356             // is it a dummy node?
357             if (threadable == null) {
358                 threadable = c.child.threadable;
359             }
360 
361             String subj = threadable.simplifiedSubject();
362 
363             // Dont thread together all subjectless messages
364             if (subj == null || subj.length() == 0) {
365                 continue;
366             }
367 
368             ThreadContainer old = subjectTable.get(subj);
369 
370             if (old == c) { // That's us
371                 continue;
372             }
373 
374             // We have now found another container in the root set with the same subject
375             // Remove the "second" message from the root set
376             if (prev == null) {
377                 root.child = c.next;
378             } else {
379                 prev.next = c.next;
380             }
381             c.next = null;
382 
383             if (old.threadable == null && c.threadable == null) {
384                 // both dummies - merge them
385                 ThreadContainer tail;
386                 for (tail = old.child;
387                     tail != null && tail.next != null;
388                     tail = tail.next) {
389                     // do nothing
390                 }
391 
392                 if (tail != null) { // protect against possible NPE
393                     tail.next = c.child;
394                 }
395 
396                 for (tail = c.child; tail != null; tail = tail.next) {
397                     tail.parent = old;
398                 }
399 
400                 c.child = null;
401             } else if (
402                 old.threadable == null
403                     || (c.threadable != null
404                         && c.threadable.subjectIsReply()
405                         && !old.threadable.subjectIsReply())) {
406                 // Else if old is empty, or c has "Re:" and old does not  ==> make this message a child of old
407                 c.parent = old;
408                 c.next = old.child;
409                 old.child = c;
410             } else {
411                 // else make the old and new messages be children of a new dummy container.
412                 // We create a new container object for old.msg and empty the old container
413                 ThreadContainer newc = new ThreadContainer();
414                 newc.threadable = old.threadable;
415                 newc.child = old.child;
416 
417                 for (ThreadContainer tail = newc.child;
418                     tail != null;
419                     tail = tail.next)
420                 {
421                     tail.parent = newc;
422                 }
423 
424                 old.threadable = null;
425                 old.child = null;
426 
427                 c.parent = old;
428                 newc.parent = old;
429 
430                 // Old is now a dummy- give it 2 kids , c and newc
431                 old.child = c;
432                 c.next = newc;
433             }
434             // We've done a merge, so keep the same prev
435             c = prev;
436         }
437 
438         subjectTable.clear();
439         subjectTable = null;
440 
441     }
442 
443 
444     // DEPRECATED METHODS - for API compatibility only - DO NOT USE
445 
446     /**
447      * The client passes in an array of Threadable objects, and
448      * the Threader constructs a connected 'graph' of messages
449      * @param messages array of messages to thread
450      * @return null if messages == null or root.child == null
451      * @deprecated (2.2) prefer {@link #thread(List)}
452      */
453     @Deprecated
454     public Threadable thread(Threadable[] messages) {
455         return thread(java.util.Arrays.asList(messages));
456     }
457 
458 }