001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 019package org.apache.commons.net.nntp; 020 021/** 022 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski. 023 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details. 024 * For his Java implementation, see 025 * <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java"> 026 * http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a> 027 */ 028 029import java.util.Arrays; 030import java.util.HashMap; 031import java.util.Iterator; 032import java.util.List; 033import java.util.Map; 034 035public class Threader { 036 037 /** 038 * The client passes in a list of Threadable objects, and 039 * the Threader constructs a connected 'graph' of messages 040 * @param messages list of messages to thread, must not be empty 041 * @return null if messages == null or root.child == null or messages list is empty 042 * @since 2.2 043 */ 044 public Threadable thread(List<? extends Threadable> messages) { 045 return thread((Iterable<? extends Threadable>)messages); 046 } 047 048 /** 049 * The client passes in a list of Iterable objects, and 050 * the Threader constructs a connected 'graph' of messages 051 * @param messages iterable of messages to thread, must not be empty 052 * @return null if messages == null or root.child == null or messages list is empty 053 * @since 3.0 054 */ 055 public Threadable thread(Iterable<? extends Threadable> messages) { 056 if (messages == null) { 057 return null; 058 } 059 060 HashMap<String,ThreadContainer> idTable = new HashMap<String,ThreadContainer>(); 061 062 // walk through each Threadable element 063 for (Threadable t : messages) { 064 if (!t.isDummy()) { 065 buildContainer(t, idTable); 066 } 067 } 068 069 if (idTable.isEmpty()) { 070 return null; 071 } 072 073 ThreadContainer root = findRootSet(idTable); 074 idTable.clear(); 075 idTable = null; 076 077 pruneEmptyContainers(root); 078 079 root.reverseChildren(); 080 gatherSubjects(root); 081 082 if (root.next != null) { 083 throw new RuntimeException("root node has a next:" + root); 084 } 085 086 for (ThreadContainer r = root.child; r != null; r = r.next) { 087 if (r.threadable == null) { 088 r.threadable = r.child.threadable.makeDummy(); 089 } 090 } 091 092 Threadable result = (root.child == null ? null : root.child.threadable); 093 root.flush(); 094 095 return result; 096 } 097 098 /** 099 * 100 * @param threadable 101 * @param idTable 102 */ 103 private void buildContainer(Threadable threadable, HashMap<String,ThreadContainer> idTable) { 104 String id = threadable.messageThreadId(); 105 ThreadContainer container = idTable.get(id); 106 int bogusIdCount = 0; 107 108 // A ThreadContainer exists for this id already. This should be a forward reference, but may 109 // be a duplicate id, in which case we will need to generate a bogus placeholder id 110 if (container != null) { 111 if (container.threadable != null) { // oops! duplicate ids... 112 bogusIdCount++ ; // Avoid dead local store warning 113 id = "<Bogus-id:" + (bogusIdCount) + ">"; 114 container = null; 115 } else { 116 // The container just contained a forward reference to this message, so let's 117 // fill in the threadable field of the container with this message 118 container.threadable = threadable; 119 } 120 } 121 122 // No container exists for that message Id. Create one and insert it into the hash table. 123 if (container == null) { 124 container = new ThreadContainer(); 125 container.threadable = threadable; 126 idTable.put(id, container); 127 } 128 129 // Iterate through all of the references and create ThreadContainers for any references that 130 // don't have them. 131 ThreadContainer parentRef = null; 132 { 133 String[] references = threadable.messageThreadReferences(); 134 for (String refString : references) 135 { 136 ThreadContainer ref = idTable.get(refString); 137 138 // if this id doesnt have a container, create one 139 if (ref == null) { 140 ref = new ThreadContainer(); 141 idTable.put(refString, ref); 142 } 143 144 // Link references together in the order they appear in the References: header, 145 // IF they dont have a have a parent already && 146 // IF it will not cause a circular reference 147 if ((parentRef != null) 148 && (ref.parent == null) 149 && (parentRef != ref) 150 && !(ref.findChild(parentRef))) { 151 // Link ref into the parent's child list 152 ref.parent = parentRef; 153 ref.next = parentRef.child; 154 parentRef.child = ref; 155 } 156 parentRef = ref; 157 } 158 } 159 160 // parentRef is now set to the container of the last element in the references field. make that 161 // be the parent of this container, unless doing so causes a circular reference 162 if (parentRef != null 163 && (parentRef == container || container.findChild(parentRef))) 164 { 165 parentRef = null; 166 } 167 168 // if it has a parent already, its because we saw this message in a References: field, and presumed 169 // a parent based on the other entries in that field. Now that we have the actual message, we can 170 // throw away the old parent and use this new one 171 if (container.parent != null) { 172 ThreadContainer rest, prev; 173 174 for (prev = null, rest = container.parent.child; 175 rest != null; 176 prev = rest, rest = rest.next) { 177 if (rest == container) { 178 break; 179 } 180 } 181 182 if (rest == null) { 183 throw new RuntimeException( 184 "Didnt find " 185 + container 186 + " in parent" 187 + container.parent); 188 } 189 190 // Unlink this container from the parent's child list 191 if (prev == null) { 192 container.parent.child = container.next; 193 } else { 194 prev.next = container.next; 195 } 196 197 container.next = null; 198 container.parent = null; 199 } 200 201 // If we have a parent, link container into the parents child list 202 if (parentRef != null) { 203 container.parent = parentRef; 204 container.next = parentRef.child; 205 parentRef.child = container; 206 } 207 } 208 209 /** 210 * Find the root set of all existing ThreadContainers 211 * @param idTable 212 * @return root the ThreadContainer representing the root node 213 */ 214 private ThreadContainer findRootSet(HashMap<String, ThreadContainer> idTable) { 215 ThreadContainer root = new ThreadContainer(); 216 Iterator<Map.Entry<String, ThreadContainer>> iter = idTable.entrySet().iterator(); 217 218 while (iter.hasNext()) { 219 Map.Entry<String, ThreadContainer> entry = iter.next(); 220 ThreadContainer c = entry.getValue(); 221 if (c.parent == null) { 222 if (c.next != null) { 223 throw new RuntimeException( 224 "c.next is " + c.next.toString()); 225 } 226 c.next = root.child; 227 root.child = c; 228 } 229 } 230 return root; 231 } 232 233 /** 234 * Delete any empty or dummy ThreadContainers 235 * @param parent 236 */ 237 private void pruneEmptyContainers(ThreadContainer parent) { 238 ThreadContainer container, prev, next; 239 for (prev = null, container = parent.child, next = container.next; 240 container != null; 241 prev = container, 242 container = next, 243 next = (container == null ? null : container.next)) { 244 245 // Is it empty and without any children? If so,delete it 246 if (container.threadable == null && container.child == null) { 247 if (prev == null) { 248 parent.child = container.next; 249 } else { 250 prev.next = container.next; 251 } 252 253 // Set container to prev so that prev keeps its same value the next time through the loop 254 container = prev; 255 } 256 257 // Else if empty, with kids, and (not at root or only one kid) 258 else if ( 259 container.threadable == null 260 && container.child != null 261 && (container.parent != null 262 || container.child.next == null)) { 263 // We have an invalid/expired message with kids. Promote the kids to this level. 264 ThreadContainer tail; 265 ThreadContainer kids = container.child; 266 267 // Remove this container and replace with 'kids'. 268 if (prev == null) { 269 parent.child = kids; 270 } else { 271 prev.next = kids; 272 } 273 274 // Make each child's parent be this level's parent -> i.e. promote the children. 275 // Make the last child's next point to this container's next 276 // i.e. splice kids into the list in place of container 277 for (tail = kids; tail.next != null; tail = tail.next) { 278 tail.parent = container.parent; 279 } 280 281 tail.parent = container.parent; 282 tail.next = container.next; 283 284 // next currently points to the item after the inserted items in the chain - reset that so we process the newly 285 // promoted items next time round 286 next = kids; 287 288 // Set container to prev so that prev keeps its same value the next time through the loop 289 container = prev; 290 } else if (container.child != null) { 291 // A real message , with kids 292 // Iterate over the children 293 pruneEmptyContainers(container); 294 } 295 } 296 } 297 298 /** 299 * If any two members of the root set have the same subject, merge them. 300 * This is to attempt to accomodate messages without References: headers. 301 * @param root 302 */ 303 private void gatherSubjects(ThreadContainer root) { 304 305 int count = 0; 306 307 for (ThreadContainer c = root.child; c != null; c = c.next) { 308 count++; 309 } 310 311 // TODO verify this will avoid rehashing 312 HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9); 313 count = 0; 314 315 for (ThreadContainer c = root.child; c != null; c = c.next) { 316 Threadable threadable = c.threadable; 317 318 // No threadable? If so, it is a dummy node in the root set. 319 // Only root set members may be dummies, and they alway have at least 2 kids 320 // Take the first kid as representative of the subject 321 if (threadable == null) { 322 threadable = c.child.threadable; 323 } 324 325 String subj = threadable.simplifiedSubject(); 326 327 if (subj == null || subj.length() == 0) { 328 continue; 329 } 330 331 ThreadContainer old = subjectTable.get(subj); 332 333 // Add this container to the table iff: 334 // - There exists no container with this subject 335 // - or this is a dummy container and the old one is not - the dummy one is 336 // more interesting as a root, so put it in the table instead 337 // - The container in the table has a "Re:" version of this subject, and 338 // this container has a non-"Re:" version of this subject. The non-"Re:" version 339 // is the more interesting of the two. 340 if (old == null 341 || (c.threadable == null && old.threadable != null) 342 || (old.threadable != null 343 && old.threadable.subjectIsReply() 344 && c.threadable != null 345 && !c.threadable.subjectIsReply())) { 346 subjectTable.put(subj, c); 347 count++; 348 } 349 } 350 351 // If the table is empty, we're done 352 if (count == 0) { 353 return; 354 } 355 356 // subjectTable is now populated with one entry for each subject which occurs in the 357 // root set. Iterate over the root set, and gather together the difference. 358 ThreadContainer prev, c, rest; 359 for (prev = null, c = root.child, rest = c.next; 360 c != null; 361 prev = c, c = rest, rest = (rest == null ? null : rest.next)) { 362 Threadable threadable = c.threadable; 363 364 // is it a dummy node? 365 if (threadable == null) { 366 threadable = c.child.threadable; 367 } 368 369 String subj = threadable.simplifiedSubject(); 370 371 // Dont thread together all subjectless messages 372 if (subj == null || subj.length() == 0) { 373 continue; 374 } 375 376 ThreadContainer old = subjectTable.get(subj); 377 378 if (old == c) { // That's us 379 continue; 380 } 381 382 // We have now found another container in the root set with the same subject 383 // Remove the "second" message from the root set 384 if (prev == null) { 385 root.child = c.next; 386 } else { 387 prev.next = c.next; 388 } 389 c.next = null; 390 391 if (old.threadable == null && c.threadable == null) { 392 // both dummies - merge them 393 ThreadContainer tail; 394 for (tail = old.child; 395 tail != null && tail.next != null; 396 tail = tail.next) { 397 // do nothing 398 } 399 400 if (tail != null) { // protect against possible NPE 401 tail.next = c.child; 402 } 403 404 for (tail = c.child; tail != null; tail = tail.next) { 405 tail.parent = old; 406 } 407 408 c.child = null; 409 } else if ( 410 old.threadable == null 411 || (c.threadable != null 412 && c.threadable.subjectIsReply() 413 && !old.threadable.subjectIsReply())) { 414 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old 415 c.parent = old; 416 c.next = old.child; 417 old.child = c; 418 } else { 419 // else make the old and new messages be children of a new dummy container. 420 // We create a new container object for old.msg and empty the old container 421 ThreadContainer newc = new ThreadContainer(); 422 newc.threadable = old.threadable; 423 newc.child = old.child; 424 425 for (ThreadContainer tail = newc.child; 426 tail != null; 427 tail = tail.next) 428 { 429 tail.parent = newc; 430 } 431 432 old.threadable = null; 433 old.child = null; 434 435 c.parent = old; 436 newc.parent = old; 437 438 // Old is now a dummy- give it 2 kids , c and newc 439 old.child = c; 440 c.next = newc; 441 } 442 // We've done a merge, so keep the same prev 443 c = prev; 444 } 445 446 subjectTable.clear(); 447 subjectTable = null; 448 449 } 450 451 452 // DEPRECATED METHODS - for API compatibility only - DO NOT USE 453 454 /** 455 * The client passes in an array of Threadable objects, and 456 * the Threader constructs a connected 'graph' of messages 457 * @param messages array of messages to thread, must not be empty 458 * @return null if messages == null or root.child == null or messages array is empty 459 * @deprecated (2.2) prefer {@link #thread(List)} 460 */ 461 @Deprecated 462 public Threadable thread(Threadable[] messages) { 463 if (messages == null) { 464 return null; 465 } 466 return thread(Arrays.asList(messages)); 467 } 468 469}