001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.net.nntp; 019 020import java.util.Arrays; 021import java.util.HashMap; 022import java.util.List; 023import java.util.Map; 024 025/** 026 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski. 027 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details. 028 * For his Java implementation, see 029 * <a href="https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java"> 030 * https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a> 031 */ 032public class Threader { 033 034 /** 035 * Constructs a new instance. 036 */ 037 public Threader() { 038 // empty 039 } 040 041 /** 042 * 043 * @param threadable 044 * @param idTable 045 */ 046 private void buildContainer(final Threadable threadable, final HashMap<String, NntpThreadContainer> idTable) { 047 String id = threadable.messageThreadId(); 048 NntpThreadContainer container = idTable.get(id); 049 int bogusIdCount = 0; 050 051 // A NntpThreadContainer exists for this id already. This should be a forward reference, but may 052 // be a duplicate id, in which case we will need to generate a bogus placeholder id 053 if (container != null) { 054 if (container.threadable != null) { // oops! duplicate ids... 055 bogusIdCount++; // Avoid dead local store warning 056 id = "<Bogus-id:" + bogusIdCount + ">"; 057 container = null; 058 } else { 059 // The container just contained a forward reference to this message, so let's 060 // fill in the threadable field of the container with this message 061 container.threadable = threadable; 062 } 063 } 064 065 // No container exists for that message Id. Create one and insert it into the hash table. 066 if (container == null) { 067 container = new NntpThreadContainer(); 068 container.threadable = threadable; 069 idTable.put(id, container); 070 } 071 072 // Iterate through all the references and create ThreadContainers for any references that 073 // don't have them. 074 NntpThreadContainer parentRef = null; 075 { 076 final String[] references = threadable.messageThreadReferences(); 077 for (final String refString : references) { 078 NntpThreadContainer ref = idTable.get(refString); 079 080 // if this id doesn't have a container, create one 081 if (ref == null) { 082 ref = new NntpThreadContainer(); 083 idTable.put(refString, ref); 084 } 085 086 // Link references together in the order they appear in the References: header, 087 // IF they don't have a parent already && 088 // IF it will not cause a circular reference 089 if (parentRef != null && ref.parent == null && parentRef != ref && !ref.findChild(parentRef)) { 090 // Link ref into the parent's child list 091 ref.parent = parentRef; 092 ref.next = parentRef.child; 093 parentRef.child = ref; 094 } 095 parentRef = ref; 096 } 097 } 098 099 // parentRef is now set to the container of the last element in the references field. make that 100 // be the parent of this container, unless doing so causes a circular reference 101 if (parentRef != null && (parentRef == container || container.findChild(parentRef))) { 102 parentRef = null; 103 } 104 105 // if it has a parent already, it's because we saw this message in a References: field, and presumed 106 // a parent based on the other entries in that field. Now that we have the actual message, we can 107 // throw away the old parent and use this new one 108 if (container.parent != null) { 109 NntpThreadContainer rest; 110 NntpThreadContainer prev; 111 112 for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) { 113 if (rest == container) { 114 break; 115 } 116 } 117 118 if (rest == null) { 119 throw new IllegalStateException("Didnt find " + container + " in parent " + container.parent); 120 } 121 122 // Unlink this container from the parent's child list 123 if (prev == null) { 124 container.parent.child = container.next; 125 } else { 126 prev.next = container.next; 127 } 128 129 container.next = null; 130 container.parent = null; 131 } 132 133 // If we have a parent, link container into the parents child list 134 if (parentRef != null) { 135 container.parent = parentRef; 136 container.next = parentRef.child; 137 parentRef.child = container; 138 } 139 } 140 141 /** 142 * Find the root set of all existing ThreadContainers 143 * 144 * @param idTable 145 * @return root the NntpThreadContainer representing the root node 146 */ 147 private NntpThreadContainer findRootSet(final HashMap<String, NntpThreadContainer> idTable) { 148 final NntpThreadContainer root = new NntpThreadContainer(); 149 for (final Map.Entry<String, NntpThreadContainer> entry : idTable.entrySet()) { 150 final NntpThreadContainer c = entry.getValue(); 151 if (c.parent == null) { 152 if (c.next != null) { 153 throw new IllegalStateException("c.next is " + c.next.toString()); 154 } 155 c.next = root.child; 156 root.child = c; 157 } 158 } 159 return root; 160 } 161 162 /** 163 * If any two members of the root set have the same subject, merge them. This is to attempt to accommodate messages without References: headers. 164 * 165 * @param root 166 */ 167 private void gatherSubjects(final NntpThreadContainer root) { 168 169 int count = 0; 170 171 for (NntpThreadContainer c = root.child; c != null; c = c.next) { 172 count++; 173 } 174 175 // TODO verify this will avoid rehashing 176 HashMap<String, NntpThreadContainer> subjectTable = new HashMap<>((int) (count * 1.2), (float) 0.9); 177 count = 0; 178 179 for (NntpThreadContainer c = root.child; c != null; c = c.next) { 180 Threadable threadable = c.threadable; 181 182 // No threadable? If so, it is a dummy node in the root set. 183 // Only root set members may be dummies, and they always have at least 2 kids 184 // Take the first kid as representative of the subject 185 if (threadable == null) { 186 threadable = c.child.threadable; 187 } 188 189 final String subj = threadable.simplifiedSubject(); 190 191 if (subj == null || subj.isEmpty()) { 192 continue; 193 } 194 195 final NntpThreadContainer old = subjectTable.get(subj); 196 197 // Add this container to the table iff: 198 // - There exists no container with this subject 199 // - or this is a dummy container and the old one is not - the dummy one is 200 // more interesting as a root, so put it in the table instead 201 // - The container in the table has a "Re:" version of this subject, and 202 // this container has a non-"Re:" version of this subject. The non-"Re:" version 203 // is the more interesting of the two. 204 if (old == null || c.threadable == null && old.threadable != null 205 || old.threadable != null && old.threadable.subjectIsReply() && c.threadable != null && !c.threadable.subjectIsReply()) { 206 subjectTable.put(subj, c); 207 count++; 208 } 209 } 210 211 // If the table is empty, we're done 212 if (count == 0) { 213 return; 214 } 215 216 // subjectTable is now populated with one entry for each subject which occurs in the 217 // root set. Iterate over the root set, and gather together the difference. 218 NntpThreadContainer prev; 219 NntpThreadContainer c; 220 NntpThreadContainer rest; 221 for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = rest == null ? null : rest.next) { 222 Threadable threadable = c.threadable; 223 224 // is it a dummy node? 225 if (threadable == null) { 226 threadable = c.child.threadable; 227 } 228 229 final String subj = threadable.simplifiedSubject(); 230 231 // Don't thread together all subjectless messages 232 if (subj == null || subj.isEmpty()) { 233 continue; 234 } 235 236 final NntpThreadContainer old = subjectTable.get(subj); 237 238 if (old == c) { // That's us 239 continue; 240 } 241 242 // We have now found another container in the root set with the same subject 243 // Remove the "second" message from the root set 244 if (prev == null) { 245 root.child = c.next; 246 } else { 247 prev.next = c.next; 248 } 249 c.next = null; 250 251 if (old.threadable == null && c.threadable == null) { 252 // both dummies - merge them 253 NntpThreadContainer tail; 254 for (tail = old.child; tail != null && tail.next != null; tail = tail.next) { 255 // do nothing 256 } 257 258 if (tail != null) { // protect against possible NPE 259 tail.next = c.child; 260 } 261 262 for (tail = c.child; tail != null; tail = tail.next) { 263 tail.parent = old; 264 } 265 266 c.child = null; 267 } else if (old.threadable == null || c.threadable != null && c.threadable.subjectIsReply() && !old.threadable.subjectIsReply()) { 268 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old 269 c.parent = old; 270 c.next = old.child; 271 old.child = c; 272 } else { 273 // else make the old and new messages be children of a new dummy container. 274 // We create a new container object for old.msg and empty the old container 275 final NntpThreadContainer newc = new NntpThreadContainer(); 276 newc.threadable = old.threadable; 277 newc.child = old.child; 278 279 for (NntpThreadContainer tail = newc.child; tail != null; tail = tail.next) { 280 tail.parent = newc; 281 } 282 283 old.threadable = null; 284 old.child = null; 285 286 c.parent = old; 287 newc.parent = old; 288 289 // Old is now a dummy- give it 2 kids , c and newc 290 old.child = c; 291 c.next = newc; 292 } 293 // We've done a merge, so keep the same prev 294 c = prev; 295 } 296 297 subjectTable.clear(); 298 subjectTable = null; 299 300 } 301 302 /** 303 * Delete any empty or dummy ThreadContainers 304 * 305 * @param parent 306 */ 307 private void pruneEmptyContainers(final NntpThreadContainer parent) { 308 NntpThreadContainer container; 309 NntpThreadContainer prev; 310 NntpThreadContainer next; 311 for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = container == null 312 ? null 313 : container.next) { 314 315 // Is it empty and without any children? If so,delete it 316 if (container.threadable == null && container.child == null) { 317 if (prev == null) { 318 parent.child = container.next; 319 } else { 320 prev.next = container.next; 321 } 322 // Set container to prev so that prev keeps its same value the next time through the loop 323 container = prev; 324 // Else if empty, with kids, and (not at root or only one kid) 325 } else if (container.threadable == null && (container.parent != null || container.child.next == null)) { 326 // We have an invalid/expired message with kids. Promote the kids to this level. 327 NntpThreadContainer tail; 328 final NntpThreadContainer kids = container.child; 329 330 // Remove this container and replace with 'kids'. 331 if (prev == null) { 332 parent.child = kids; 333 } else { 334 prev.next = kids; 335 } 336 337 // Make each child's parent be this level's parent -> i.e. promote the children. 338 // Make the last child's next point to this container's next 339 // i.e. splice kids into the list in place of container 340 for (tail = kids; tail.next != null; tail = tail.next) { 341 tail.parent = container.parent; 342 } 343 344 tail.parent = container.parent; 345 tail.next = container.next; 346 347 // next currently points to the item after the inserted items in the chain - reset that, so we process the newly 348 // promoted items next time round 349 next = kids; 350 351 // Set container to prev so that prev keeps its same value the next time through the loop 352 container = prev; 353 } else if (container.child != null) { 354 // A real message , with kids 355 // Iterate over the children 356 pruneEmptyContainers(container); 357 } 358 } 359 } 360 361 /** 362 * The client passes in a list of Iterable objects, and the Threader constructs a connected 'graph' of messages 363 * 364 * @param messages iterable of messages to thread, must not be empty 365 * @return null if messages == null or root.child == null or messages list is empty 366 * @since 3.0 367 */ 368 public Threadable thread(final Iterable<? extends Threadable> messages) { 369 if (messages == null) { 370 return null; 371 } 372 373 HashMap<String, NntpThreadContainer> idTable = new HashMap<>(); 374 375 // walk through each Threadable element 376 for (final Threadable t : messages) { 377 if (!t.isDummy()) { 378 buildContainer(t, idTable); 379 } 380 } 381 382 if (idTable.isEmpty()) { 383 return null; 384 } 385 386 final NntpThreadContainer root = findRootSet(idTable); 387 idTable.clear(); 388 idTable = null; 389 390 pruneEmptyContainers(root); 391 392 root.reverseChildren(); 393 gatherSubjects(root); 394 395 if (root.next != null) { 396 throw new IllegalStateException("root node has a next:" + root); 397 } 398 399 for (NntpThreadContainer r = root.child; r != null; r = r.next) { 400 if (r.threadable == null) { 401 r.threadable = r.child.threadable.makeDummy(); 402 } 403 } 404 405 final Threadable result = root.child == null ? null : root.child.threadable; 406 root.flush(); 407 408 return result; 409 } 410 411 /** 412 * The client passes in a list of Threadable objects, and the Threader constructs a connected 'graph' of messages 413 * 414 * @param messages list of messages to thread, must not be empty 415 * @return null if messages == null or root.child == null or messages list is empty 416 * @since 2.2 417 */ 418 public Threadable thread(final List<? extends Threadable> messages) { 419 return thread((Iterable<? extends Threadable>) messages); 420 } 421 422 // DEPRECATED METHODS - for API compatibility only - DO NOT USE 423 424 /** 425 * The client passes in an array of Threadable objects, and the Threader constructs a connected 'graph' of messages 426 * 427 * @param messages array of messages to thread, must not be empty 428 * @return null if messages == null or root.child == null or messages array is empty 429 * @deprecated (2.2) prefer {@link #thread(List)} 430 */ 431 @Deprecated 432 public Threadable thread(final Threadable[] messages) { 433 if (messages == null) { 434 return null; 435 } 436 return thread(Arrays.asList(messages)); 437 } 438 439}