001 /*
002 * Copyright 2001-2005 The Apache Software Foundation
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017 package org.apache.commons.net.nntp;
018
019 /**
020 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
021 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
022 * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
023 *
024 * @author rwinston <rwinston@checkfree.com>
025 *
026 */
027
028 import java.util.HashMap;
029 import java.util.Iterator;
030
031 public class Threader {
032 private ThreadContainer root;
033 private HashMap idTable;
034 private int bogusIdCount = 0;
035
036 /**
037 * The main threader entry point - The client passes in an array of Threadable objects, and
038 * the Threader constructs a connected 'graph' of messages
039 * @param messages
040 * @return
041 */
042 public Threadable thread(Threadable[] messages) {
043 if (messages == null)
044 return null;
045
046 idTable = new HashMap();
047
048 // walk through each Threadable element
049 for (int i = 0; i < messages.length; ++i) {
050 if (!messages[i].isDummy())
051 buildContainer(messages[i]);
052 }
053
054 root = findRootSet();
055 idTable.clear();
056 idTable = null;
057
058 pruneEmptyContainers(root);
059
060 root.reverseChildren();
061 gatherSubjects();
062
063 if (root.next != null)
064 throw new RuntimeException("root node has a next:" + root);
065
066 for (ThreadContainer r = root.child; r != null; r = r.next) {
067 if (r.threadable == null)
068 r.threadable = r.child.threadable.makeDummy();
069 }
070
071 Threadable result = (root.child == null ? null : root.child.threadable);
072 root.flush();
073 root = null;
074
075 return result;
076 }
077
078 /**
079 *
080 * @param threadable
081 */
082 private void buildContainer(Threadable threadable) {
083 String id = threadable.messageThreadId();
084 ThreadContainer container = (ThreadContainer) idTable.get(id);
085
086 // A ThreadContainer exists for this id already. This should be a forward reference, but may
087 // be a duplicate id, in which case we will need to generate a bogus placeholder id
088 if (container != null) {
089 if (container.threadable != null) { // oops! duplicate ids...
090 id = "<Bogus-id:" + (bogusIdCount++) + ">";
091 container = null;
092 } else {
093 // The container just contained a forward reference to this message, so let's
094 // fill in the threadable field of the container with this message
095 container.threadable = threadable;
096 }
097 }
098
099 // No container exists for that message Id. Create one and insert it into the hash table.
100 if (container == null) {
101 container = new ThreadContainer();
102 container.threadable = threadable;
103 idTable.put(id, container);
104 }
105
106 // Iterate through all of the references and create ThreadContainers for any references that
107 // don't have them.
108 ThreadContainer parentRef = null;
109 {
110 String[] references = threadable.messageThreadReferences();
111 for (int i = 0; i < references.length; ++i) {
112 String refString = references[i];
113 ThreadContainer ref = (ThreadContainer) idTable.get(refString);
114
115 // if this id doesnt have a container, create one
116 if (ref == null) {
117 ref = new ThreadContainer();
118 idTable.put(refString, ref);
119 }
120
121 // Link references together in the order they appear in the References: header,
122 // IF they dont have a have a parent already &&
123 // IF it will not cause a circular reference
124 if ((parentRef != null)
125 && (ref.parent == null)
126 && (parentRef != ref)
127 && !(parentRef.findChild(ref))) {
128 // Link ref into the parent's child list
129 ref.parent = parentRef;
130 ref.next = parentRef.child;
131 parentRef.child = ref;
132 }
133 parentRef = ref;
134 }
135 }
136
137 // parentRef is now set to the container of the last element in the references field. make that
138 // be the parent of this container, unless doing so causes a circular reference
139 if (parentRef != null
140 && (parentRef == container || container.findChild(parentRef)))
141 parentRef = null;
142
143 // if it has a parent already, its because we saw this message in a References: field, and presumed
144 // a parent based on the other entries in that field. Now that we have the actual message, we can
145 // throw away the old parent and use this new one
146 if (container.parent != null) {
147 ThreadContainer rest, prev;
148
149 for (prev = null, rest = container.parent.child;
150 rest != null;
151 prev = rest, rest = rest.next) {
152 if (rest == container)
153 break;
154 }
155
156 if (rest == null) {
157 throw new RuntimeException(
158 "Didnt find "
159 + container
160 + " in parent"
161 + container.parent);
162 }
163
164 // Unlink this container from the parent's child list
165 if (prev == null)
166 container.parent.child = container.next;
167 else
168 prev.next = container.next;
169
170 container.next = null;
171 container.parent = null;
172 }
173
174 // If we have a parent, link container into the parents child list
175 if (parentRef != null) {
176 container.parent = parentRef;
177 container.next = parentRef.child;
178 parentRef.child = container;
179 }
180 }
181
182 /**
183 * Find the root set of all existing ThreadContainers
184 * @return root the ThreadContainer representing the root node
185 */
186 private ThreadContainer findRootSet() {
187 ThreadContainer root = new ThreadContainer();
188 Iterator iter = idTable.keySet().iterator();
189
190 while (iter.hasNext()) {
191 Object key = iter.next();
192 ThreadContainer c = (ThreadContainer) idTable.get(key);
193 if (c.parent == null) {
194 if (c.next != null)
195 throw new RuntimeException(
196 "c.next is " + c.next.toString());
197 c.next = root.child;
198 root.child = c;
199 }
200 }
201 return root;
202 }
203
204 /**
205 * Delete any empty or dummy ThreadContainers
206 * @param parent
207 */
208 private void pruneEmptyContainers(ThreadContainer parent) {
209 ThreadContainer container, prev, next;
210 for (prev = null, container = parent.child, next = container.next;
211 container != null;
212 prev = container,
213 container = next,
214 next = (container == null ? null : container.next)) {
215
216 // Is it empty and without any children? If so,delete it
217 if (container.threadable == null && container.child == null) {
218 if (prev == null)
219 parent.child = container.next;
220 else
221 prev.next = container.next;
222
223 // Set container to prev so that prev keeps its same value the next time through the loop
224 container = prev;
225 }
226
227 // Else if empty, with kids, and (not at root or only one kid)
228 else if (
229 container.threadable == null
230 && container.child != null
231 && (container.parent != null
232 || container.child.next == null)) {
233 // We have an invalid/expired message with kids. Promote the kids to this level.
234 ThreadContainer tail;
235 ThreadContainer kids = container.child;
236
237 // Remove this container and replace with 'kids'.
238 if (prev == null)
239 parent.child = kids;
240 else
241 prev.next = kids;
242
243 // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
244 // i.e. splice kids into the list in place of container
245 for (tail = kids; tail.next != null; tail = tail.next)
246 tail.parent = container.parent;
247
248 tail.parent = container.parent;
249 tail.next = container.next;
250
251 // next currently points to the item after the inserted items in the chain - reset that so we process the newly
252 // promoted items next time round
253 next = kids;
254
255 // Set container to prev so that prev keeps its same value the next time through the loop
256 container = prev;
257 } else if (container.child != null) {
258 // A real message , with kids
259 // Iterate over the children
260 pruneEmptyContainers(container);
261 }
262 }
263 }
264
265 /**
266 * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
267 */
268 private void gatherSubjects() {
269
270 int count = 0;
271
272 for (ThreadContainer c = root.child; c != null; c = c.next)
273 count++;
274
275 // TODO verify this will avoid rehashing
276 HashMap subjectTable = new HashMap((int) (count * 1.2), (float) 0.9);
277 count = 0;
278
279 for (ThreadContainer c = root.child; c != null; c = c.next) {
280 Threadable threadable = c.threadable;
281
282 // No threadable? If so, it is a dummy node in the root set.
283 // Only root set members may be dummies, and they alway have at least 2 kids
284 // Take the first kid as representative of the subject
285 if (threadable == null)
286 threadable = c.child.threadable;
287
288 String subj = threadable.simplifiedSubject();
289
290 if (subj == null || subj == "")
291 continue;
292
293 ThreadContainer old = (ThreadContainer) subjectTable.get(subj);
294
295 // Add this container to the table iff:
296 // - There exists no container with this subject
297 // - or this is a dummy container and the old one is not - the dummy one is
298 // more interesting as a root, so put it in the table instead
299 // - The container in the table has a "Re:" version of this subject, and
300 // this container has a non-"Re:" version of this subject. The non-"Re:" version
301 // is the more interesting of the two.
302 if (old == null
303 || (c.threadable == null && old.threadable != null)
304 || (old.threadable != null
305 && old.threadable.subjectIsReply()
306 && c.threadable != null
307 && !c.threadable.subjectIsReply())) {
308 subjectTable.put(subj, c);
309 count++;
310 }
311 }
312
313 // If the table is empty, we're done
314 if (count == 0)
315 return;
316
317 // subjectTable is now populated with one entry for each subject which occurs in the
318 // root set. Iterate over the root set, and gather together the difference.
319 ThreadContainer prev, c, rest;
320 for (prev = null, c = root.child, rest = c.next;
321 c != null;
322 prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
323 Threadable threadable = c.threadable;
324
325 // is it a dummy node?
326 if (threadable == null)
327 threadable = c.child.threadable;
328
329 String subj = threadable.simplifiedSubject();
330
331 // Dont thread together all subjectless messages
332 if (subj == null || subj == "")
333 continue;
334
335 ThreadContainer old = (ThreadContainer) subjectTable.get(subj);
336
337 if (old == c) // That's us
338 continue;
339
340 // We have now found another container in the root set with the same subject
341 // Remove the "second" message from the root set
342 if (prev == null)
343 root.child = c.next;
344 else
345 prev.next = c.next;
346 c.next = null;
347
348 if (old.threadable == null && c.threadable == null) {
349 // both dummies - merge them
350 ThreadContainer tail;
351 for (tail = old.child;
352 tail != null && tail.next != null;
353 tail = tail.next);
354
355 tail.next = c.child;
356
357 for (tail = c.child; tail != null; tail = tail.next)
358 tail.parent = old;
359
360 c.child = null;
361 } else if (
362 old.threadable == null
363 || (c.threadable != null
364 && c.threadable.subjectIsReply()
365 && !old.threadable.subjectIsReply())) {
366 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
367 c.parent = old;
368 c.next = old.child;
369 old.child = c;
370 } else {
371 // else make the old and new messages be children of a new dummy container.
372 // We create a new container object for old.msg and empty the old container
373 ThreadContainer newc = new ThreadContainer();
374 newc.threadable = old.threadable;
375 newc.child = old.child;
376
377 for (ThreadContainer tail = newc.child;
378 tail != null;
379 tail = tail.next)
380 tail.parent = newc;
381
382 old.threadable = null;
383 old.child = null;
384
385 c.parent = old;
386 newc.parent = old;
387
388 // Old is now a dummy- give it 2 kids , c and newc
389 old.child = c;
390 c.next = newc;
391 }
392 // We've done a merge, so keep the same prev
393 c = prev;
394 }
395
396 subjectTable.clear();
397 subjectTable = null;
398
399 }
400 }
401
402 /**
403 * A placeholder utility class, used for constructing a tree of Threadables
404 * Originall implementation by Jamie Zawinski.
405 * See the Grendel source for more details <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java#511">here</a>
406 * Threadable objects
407 * @author Rory Winston <rwinston@checkfree.com>
408 */
409 class ThreadContainer {
410 Threadable threadable;
411 ThreadContainer parent;
412 ThreadContainer prev;
413 ThreadContainer next;
414 ThreadContainer child;
415
416 /**
417 *
418 * @param container
419 * @return true if child is under self's tree. Detects circular references
420 */
421 boolean findChild(ThreadContainer target) {
422 if (child == null)
423 return false;
424
425 else if (child == target)
426 return true;
427 else
428 return child.findChild(target);
429 }
430
431 // Copy the ThreadContainer tree structure down into the underlying Threadable objects
432 // (Make the Threadable tree look like the ThreadContainer tree)
433 void flush() {
434 if (parent != null && threadable == null)
435 throw new RuntimeException("no threadable in " + this.toString());
436
437 parent = null;
438
439 if (threadable != null)
440 threadable.setChild(child == null ? null : child.threadable);
441
442 if (child != null) {
443 child.flush();
444 child = null;
445 }
446
447 if (threadable != null)
448 threadable.setNext(next == null ? null : next.threadable);
449
450 if (next != null) {
451 next.flush();
452 next = null;
453 }
454
455 threadable = null;
456 }
457
458 /**
459 * Reverse the entire set of children
460 *
461 */
462 void reverseChildren() {
463 if (child != null) {
464 ThreadContainer kid, prev, rest;
465 for (prev = null, kid = child, rest = kid.next;
466 kid != null;
467 prev = kid,
468 kid = rest,
469 rest = (rest == null ? null : rest.next))
470 kid.next = prev;
471
472 child = prev;
473
474 // Do it for the kids
475 for (kid = child; kid != null; kid = kid.next)
476 kid.reverseChildren();
477 }
478 }
479 }