001 /* 002 * Copyright 2001-2005 The Apache Software Foundation 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017 package org.apache.commons.net.nntp; 018 019 /** 020 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski. 021 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details. 022 * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a> 023 * 024 * @author rwinston <rwinston@checkfree.com> 025 * 026 */ 027 028 import java.util.HashMap; 029 import java.util.Iterator; 030 031 public class Threader { 032 private ThreadContainer root; 033 private HashMap idTable; 034 private int bogusIdCount = 0; 035 036 /** 037 * The main threader entry point - The client passes in an array of Threadable objects, and 038 * the Threader constructs a connected 'graph' of messages 039 * @param messages 040 * @return 041 */ 042 public Threadable thread(Threadable[] messages) { 043 if (messages == null) 044 return null; 045 046 idTable = new HashMap(); 047 048 // walk through each Threadable element 049 for (int i = 0; i < messages.length; ++i) { 050 if (!messages[i].isDummy()) 051 buildContainer(messages[i]); 052 } 053 054 root = findRootSet(); 055 idTable.clear(); 056 idTable = null; 057 058 pruneEmptyContainers(root); 059 060 root.reverseChildren(); 061 gatherSubjects(); 062 063 if (root.next != null) 064 throw new RuntimeException("root node has a next:" + root); 065 066 for (ThreadContainer r = root.child; r != null; r = r.next) { 067 if (r.threadable == null) 068 r.threadable = r.child.threadable.makeDummy(); 069 } 070 071 Threadable result = (root.child == null ? null : root.child.threadable); 072 root.flush(); 073 root = null; 074 075 return result; 076 } 077 078 /** 079 * 080 * @param threadable 081 */ 082 private void buildContainer(Threadable threadable) { 083 String id = threadable.messageThreadId(); 084 ThreadContainer container = (ThreadContainer) idTable.get(id); 085 086 // A ThreadContainer exists for this id already. This should be a forward reference, but may 087 // be a duplicate id, in which case we will need to generate a bogus placeholder id 088 if (container != null) { 089 if (container.threadable != null) { // oops! duplicate ids... 090 id = "<Bogus-id:" + (bogusIdCount++) + ">"; 091 container = null; 092 } else { 093 // The container just contained a forward reference to this message, so let's 094 // fill in the threadable field of the container with this message 095 container.threadable = threadable; 096 } 097 } 098 099 // No container exists for that message Id. Create one and insert it into the hash table. 100 if (container == null) { 101 container = new ThreadContainer(); 102 container.threadable = threadable; 103 idTable.put(id, container); 104 } 105 106 // Iterate through all of the references and create ThreadContainers for any references that 107 // don't have them. 108 ThreadContainer parentRef = null; 109 { 110 String[] references = threadable.messageThreadReferences(); 111 for (int i = 0; i < references.length; ++i) { 112 String refString = references[i]; 113 ThreadContainer ref = (ThreadContainer) idTable.get(refString); 114 115 // if this id doesnt have a container, create one 116 if (ref == null) { 117 ref = new ThreadContainer(); 118 idTable.put(refString, ref); 119 } 120 121 // Link references together in the order they appear in the References: header, 122 // IF they dont have a have a parent already && 123 // IF it will not cause a circular reference 124 if ((parentRef != null) 125 && (ref.parent == null) 126 && (parentRef != ref) 127 && !(parentRef.findChild(ref))) { 128 // Link ref into the parent's child list 129 ref.parent = parentRef; 130 ref.next = parentRef.child; 131 parentRef.child = ref; 132 } 133 parentRef = ref; 134 } 135 } 136 137 // parentRef is now set to the container of the last element in the references field. make that 138 // be the parent of this container, unless doing so causes a circular reference 139 if (parentRef != null 140 && (parentRef == container || container.findChild(parentRef))) 141 parentRef = null; 142 143 // if it has a parent already, its because we saw this message in a References: field, and presumed 144 // a parent based on the other entries in that field. Now that we have the actual message, we can 145 // throw away the old parent and use this new one 146 if (container.parent != null) { 147 ThreadContainer rest, prev; 148 149 for (prev = null, rest = container.parent.child; 150 rest != null; 151 prev = rest, rest = rest.next) { 152 if (rest == container) 153 break; 154 } 155 156 if (rest == null) { 157 throw new RuntimeException( 158 "Didnt find " 159 + container 160 + " in parent" 161 + container.parent); 162 } 163 164 // Unlink this container from the parent's child list 165 if (prev == null) 166 container.parent.child = container.next; 167 else 168 prev.next = container.next; 169 170 container.next = null; 171 container.parent = null; 172 } 173 174 // If we have a parent, link container into the parents child list 175 if (parentRef != null) { 176 container.parent = parentRef; 177 container.next = parentRef.child; 178 parentRef.child = container; 179 } 180 } 181 182 /** 183 * Find the root set of all existing ThreadContainers 184 * @return root the ThreadContainer representing the root node 185 */ 186 private ThreadContainer findRootSet() { 187 ThreadContainer root = new ThreadContainer(); 188 Iterator iter = idTable.keySet().iterator(); 189 190 while (iter.hasNext()) { 191 Object key = iter.next(); 192 ThreadContainer c = (ThreadContainer) idTable.get(key); 193 if (c.parent == null) { 194 if (c.next != null) 195 throw new RuntimeException( 196 "c.next is " + c.next.toString()); 197 c.next = root.child; 198 root.child = c; 199 } 200 } 201 return root; 202 } 203 204 /** 205 * Delete any empty or dummy ThreadContainers 206 * @param parent 207 */ 208 private void pruneEmptyContainers(ThreadContainer parent) { 209 ThreadContainer container, prev, next; 210 for (prev = null, container = parent.child, next = container.next; 211 container != null; 212 prev = container, 213 container = next, 214 next = (container == null ? null : container.next)) { 215 216 // Is it empty and without any children? If so,delete it 217 if (container.threadable == null && container.child == null) { 218 if (prev == null) 219 parent.child = container.next; 220 else 221 prev.next = container.next; 222 223 // Set container to prev so that prev keeps its same value the next time through the loop 224 container = prev; 225 } 226 227 // Else if empty, with kids, and (not at root or only one kid) 228 else if ( 229 container.threadable == null 230 && container.child != null 231 && (container.parent != null 232 || container.child.next == null)) { 233 // We have an invalid/expired message with kids. Promote the kids to this level. 234 ThreadContainer tail; 235 ThreadContainer kids = container.child; 236 237 // Remove this container and replace with 'kids'. 238 if (prev == null) 239 parent.child = kids; 240 else 241 prev.next = kids; 242 243 // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next 244 // i.e. splice kids into the list in place of container 245 for (tail = kids; tail.next != null; tail = tail.next) 246 tail.parent = container.parent; 247 248 tail.parent = container.parent; 249 tail.next = container.next; 250 251 // next currently points to the item after the inserted items in the chain - reset that so we process the newly 252 // promoted items next time round 253 next = kids; 254 255 // Set container to prev so that prev keeps its same value the next time through the loop 256 container = prev; 257 } else if (container.child != null) { 258 // A real message , with kids 259 // Iterate over the children 260 pruneEmptyContainers(container); 261 } 262 } 263 } 264 265 /** 266 * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. 267 */ 268 private void gatherSubjects() { 269 270 int count = 0; 271 272 for (ThreadContainer c = root.child; c != null; c = c.next) 273 count++; 274 275 // TODO verify this will avoid rehashing 276 HashMap subjectTable = new HashMap((int) (count * 1.2), (float) 0.9); 277 count = 0; 278 279 for (ThreadContainer c = root.child; c != null; c = c.next) { 280 Threadable threadable = c.threadable; 281 282 // No threadable? If so, it is a dummy node in the root set. 283 // Only root set members may be dummies, and they alway have at least 2 kids 284 // Take the first kid as representative of the subject 285 if (threadable == null) 286 threadable = c.child.threadable; 287 288 String subj = threadable.simplifiedSubject(); 289 290 if (subj == null || subj == "") 291 continue; 292 293 ThreadContainer old = (ThreadContainer) subjectTable.get(subj); 294 295 // Add this container to the table iff: 296 // - There exists no container with this subject 297 // - or this is a dummy container and the old one is not - the dummy one is 298 // more interesting as a root, so put it in the table instead 299 // - The container in the table has a "Re:" version of this subject, and 300 // this container has a non-"Re:" version of this subject. The non-"Re:" version 301 // is the more interesting of the two. 302 if (old == null 303 || (c.threadable == null && old.threadable != null) 304 || (old.threadable != null 305 && old.threadable.subjectIsReply() 306 && c.threadable != null 307 && !c.threadable.subjectIsReply())) { 308 subjectTable.put(subj, c); 309 count++; 310 } 311 } 312 313 // If the table is empty, we're done 314 if (count == 0) 315 return; 316 317 // subjectTable is now populated with one entry for each subject which occurs in the 318 // root set. Iterate over the root set, and gather together the difference. 319 ThreadContainer prev, c, rest; 320 for (prev = null, c = root.child, rest = c.next; 321 c != null; 322 prev = c, c = rest, rest = (rest == null ? null : rest.next)) { 323 Threadable threadable = c.threadable; 324 325 // is it a dummy node? 326 if (threadable == null) 327 threadable = c.child.threadable; 328 329 String subj = threadable.simplifiedSubject(); 330 331 // Dont thread together all subjectless messages 332 if (subj == null || subj == "") 333 continue; 334 335 ThreadContainer old = (ThreadContainer) subjectTable.get(subj); 336 337 if (old == c) // That's us 338 continue; 339 340 // We have now found another container in the root set with the same subject 341 // Remove the "second" message from the root set 342 if (prev == null) 343 root.child = c.next; 344 else 345 prev.next = c.next; 346 c.next = null; 347 348 if (old.threadable == null && c.threadable == null) { 349 // both dummies - merge them 350 ThreadContainer tail; 351 for (tail = old.child; 352 tail != null && tail.next != null; 353 tail = tail.next); 354 355 tail.next = c.child; 356 357 for (tail = c.child; tail != null; tail = tail.next) 358 tail.parent = old; 359 360 c.child = null; 361 } else if ( 362 old.threadable == null 363 || (c.threadable != null 364 && c.threadable.subjectIsReply() 365 && !old.threadable.subjectIsReply())) { 366 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old 367 c.parent = old; 368 c.next = old.child; 369 old.child = c; 370 } else { 371 // else make the old and new messages be children of a new dummy container. 372 // We create a new container object for old.msg and empty the old container 373 ThreadContainer newc = new ThreadContainer(); 374 newc.threadable = old.threadable; 375 newc.child = old.child; 376 377 for (ThreadContainer tail = newc.child; 378 tail != null; 379 tail = tail.next) 380 tail.parent = newc; 381 382 old.threadable = null; 383 old.child = null; 384 385 c.parent = old; 386 newc.parent = old; 387 388 // Old is now a dummy- give it 2 kids , c and newc 389 old.child = c; 390 c.next = newc; 391 } 392 // We've done a merge, so keep the same prev 393 c = prev; 394 } 395 396 subjectTable.clear(); 397 subjectTable = null; 398 399 } 400 } 401 402 /** 403 * A placeholder utility class, used for constructing a tree of Threadables 404 * Originall implementation by Jamie Zawinski. 405 * See the Grendel source for more details <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java#511">here</a> 406 * Threadable objects 407 * @author Rory Winston <rwinston@checkfree.com> 408 */ 409 class ThreadContainer { 410 Threadable threadable; 411 ThreadContainer parent; 412 ThreadContainer prev; 413 ThreadContainer next; 414 ThreadContainer child; 415 416 /** 417 * 418 * @param container 419 * @return true if child is under self's tree. Detects circular references 420 */ 421 boolean findChild(ThreadContainer target) { 422 if (child == null) 423 return false; 424 425 else if (child == target) 426 return true; 427 else 428 return child.findChild(target); 429 } 430 431 // Copy the ThreadContainer tree structure down into the underlying Threadable objects 432 // (Make the Threadable tree look like the ThreadContainer tree) 433 void flush() { 434 if (parent != null && threadable == null) 435 throw new RuntimeException("no threadable in " + this.toString()); 436 437 parent = null; 438 439 if (threadable != null) 440 threadable.setChild(child == null ? null : child.threadable); 441 442 if (child != null) { 443 child.flush(); 444 child = null; 445 } 446 447 if (threadable != null) 448 threadable.setNext(next == null ? null : next.threadable); 449 450 if (next != null) { 451 next.flush(); 452 next = null; 453 } 454 455 threadable = null; 456 } 457 458 /** 459 * Reverse the entire set of children 460 * 461 */ 462 void reverseChildren() { 463 if (child != null) { 464 ThreadContainer kid, prev, rest; 465 for (prev = null, kid = child, rest = kid.next; 466 kid != null; 467 prev = kid, 468 kid = rest, 469 rest = (rest == null ? null : rest.next)) 470 kid.next = prev; 471 472 child = prev; 473 474 // Do it for the kids 475 for (kid = child; kid != null; kid = kid.next) 476 kid.reverseChildren(); 477 } 478 } 479 }