1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.net.nntp;
19
20 import java.util.Arrays;
21 import java.util.HashMap;
22 import java.util.List;
23 import java.util.Map;
24
25 /**
26 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
27 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
28 * For his Java implementation, see
29 * <a href="https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">
30 * https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
31 */
32 public class Threader {
33
34 /**
35 * Constructs a new instance.
36 */
37 public Threader() {
38 // empty
39 }
40
41 /**
42 *
43 * @param threadable
44 * @param idTable
45 */
46 private void buildContainer(final Threadable threadable, final HashMap<String, NntpThreadContainer> idTable) {
47 String id = threadable.messageThreadId();
48 NntpThreadContainer container = idTable.get(id);
49 int bogusIdCount = 0;
50
51 // A NntpThreadContainer exists for this id already. This should be a forward reference, but may
52 // be a duplicate id, in which case we will need to generate a bogus placeholder id
53 if (container != null) {
54 if (container.threadable != null) { // oops! duplicate ids...
55 bogusIdCount++; // Avoid dead local store warning
56 id = "<Bogus-id:" + bogusIdCount + ">";
57 container = null;
58 } else {
59 // The container just contained a forward reference to this message, so let's
60 // fill in the threadable field of the container with this message
61 container.threadable = threadable;
62 }
63 }
64
65 // No container exists for that message Id. Create one and insert it into the hash table.
66 if (container == null) {
67 container = new NntpThreadContainer();
68 container.threadable = threadable;
69 idTable.put(id, container);
70 }
71
72 // Iterate through all the references and create ThreadContainers for any references that
73 // don't have them.
74 NntpThreadContainer parentRef = null;
75 {
76 final String[] references = threadable.messageThreadReferences();
77 for (final String refString : references) {
78 NntpThreadContainer ref = idTable.get(refString);
79
80 // if this id doesn't have a container, create one
81 if (ref == null) {
82 ref = new NntpThreadContainer();
83 idTable.put(refString, ref);
84 }
85
86 // Link references together in the order they appear in the References: header,
87 // IF they don't have a parent already &&
88 // IF it will not cause a circular reference
89 if (parentRef != null && ref.parent == null && parentRef != ref && !ref.findChild(parentRef)) {
90 // Link ref into the parent's child list
91 ref.parent = parentRef;
92 ref.next = parentRef.child;
93 parentRef.child = ref;
94 }
95 parentRef = ref;
96 }
97 }
98
99 // parentRef is now set to the container of the last element in the references field. make that
100 // be the parent of this container, unless doing so causes a circular reference
101 if (parentRef != null && (parentRef == container || container.findChild(parentRef))) {
102 parentRef = null;
103 }
104
105 // if it has a parent already, it's because we saw this message in a References: field, and presumed
106 // a parent based on the other entries in that field. Now that we have the actual message, we can
107 // throw away the old parent and use this new one
108 if (container.parent != null) {
109 NntpThreadContainer rest;
110 NntpThreadContainer prev;
111
112 for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) {
113 if (rest == container) {
114 break;
115 }
116 }
117
118 if (rest == null) {
119 throw new IllegalStateException("Didnt find " + container + " in parent " + container.parent);
120 }
121
122 // Unlink this container from the parent's child list
123 if (prev == null) {
124 container.parent.child = container.next;
125 } else {
126 prev.next = container.next;
127 }
128
129 container.next = null;
130 container.parent = null;
131 }
132
133 // If we have a parent, link container into the parents child list
134 if (parentRef != null) {
135 container.parent = parentRef;
136 container.next = parentRef.child;
137 parentRef.child = container;
138 }
139 }
140
141 /**
142 * Find the root set of all existing ThreadContainers
143 *
144 * @param idTable
145 * @return root the NntpThreadContainer representing the root node
146 */
147 private NntpThreadContainer findRootSet(final HashMap<String, NntpThreadContainer> idTable) {
148 final NntpThreadContainer root = new NntpThreadContainer();
149 for (final Map.Entry<String, NntpThreadContainer> entry : idTable.entrySet()) {
150 final NntpThreadContainer c = entry.getValue();
151 if (c.parent == null) {
152 if (c.next != null) {
153 throw new IllegalStateException("c.next is " + c.next.toString());
154 }
155 c.next = root.child;
156 root.child = c;
157 }
158 }
159 return root;
160 }
161
162 /**
163 * If any two members of the root set have the same subject, merge them. This is to attempt to accommodate messages without References: headers.
164 *
165 * @param root
166 */
167 private void gatherSubjects(final NntpThreadContainer root) {
168
169 int count = 0;
170
171 for (NntpThreadContainer c = root.child; c != null; c = c.next) {
172 count++;
173 }
174
175 // TODO verify this will avoid rehashing
176 HashMap<String, NntpThreadContainer> subjectTable = new HashMap<>((int) (count * 1.2), (float) 0.9);
177 count = 0;
178
179 for (NntpThreadContainer c = root.child; c != null; c = c.next) {
180 Threadable threadable = c.threadable;
181
182 // No threadable? If so, it is a dummy node in the root set.
183 // Only root set members may be dummies, and they always have at least 2 kids
184 // Take the first kid as representative of the subject
185 if (threadable == null) {
186 threadable = c.child.threadable;
187 }
188
189 final String subj = threadable.simplifiedSubject();
190
191 if (subj == null || subj.isEmpty()) {
192 continue;
193 }
194
195 final NntpThreadContainer old = subjectTable.get(subj);
196
197 // Add this container to the table iff:
198 // - There exists no container with this subject
199 // - or this is a dummy container and the old one is not - the dummy one is
200 // more interesting as a root, so put it in the table instead
201 // - The container in the table has a "Re:" version of this subject, and
202 // this container has a non-"Re:" version of this subject. The non-"Re:" version
203 // is the more interesting of the two.
204 if (old == null || c.threadable == null && old.threadable != null
205 || old.threadable != null && old.threadable.subjectIsReply() && c.threadable != null && !c.threadable.subjectIsReply()) {
206 subjectTable.put(subj, c);
207 count++;
208 }
209 }
210
211 // If the table is empty, we're done
212 if (count == 0) {
213 return;
214 }
215
216 // subjectTable is now populated with one entry for each subject which occurs in the
217 // root set. Iterate over the root set, and gather together the difference.
218 NntpThreadContainer prev;
219 NntpThreadContainer c;
220 NntpThreadContainer rest;
221 for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = rest == null ? null : rest.next) {
222 Threadable threadable = c.threadable;
223
224 // is it a dummy node?
225 if (threadable == null) {
226 threadable = c.child.threadable;
227 }
228
229 final String subj = threadable.simplifiedSubject();
230
231 // Don't thread together all subjectless messages
232 if (subj == null || subj.isEmpty()) {
233 continue;
234 }
235
236 final NntpThreadContainer old = subjectTable.get(subj);
237
238 if (old == c) { // That's us
239 continue;
240 }
241
242 // We have now found another container in the root set with the same subject
243 // Remove the "second" message from the root set
244 if (prev == null) {
245 root.child = c.next;
246 } else {
247 prev.next = c.next;
248 }
249 c.next = null;
250
251 if (old.threadable == null && c.threadable == null) {
252 // both dummies - merge them
253 NntpThreadContainer tail;
254 for (tail = old.child; tail != null && tail.next != null; tail = tail.next) {
255 // do nothing
256 }
257
258 if (tail != null) { // protect against possible NPE
259 tail.next = c.child;
260 }
261
262 for (tail = c.child; tail != null; tail = tail.next) {
263 tail.parent = old;
264 }
265
266 c.child = null;
267 } else if (old.threadable == null || c.threadable != null && c.threadable.subjectIsReply() && !old.threadable.subjectIsReply()) {
268 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
269 c.parent = old;
270 c.next = old.child;
271 old.child = c;
272 } else {
273 // else make the old and new messages be children of a new dummy container.
274 // We create a new container object for old.msg and empty the old container
275 final NntpThreadContainer newc = new NntpThreadContainer();
276 newc.threadable = old.threadable;
277 newc.child = old.child;
278
279 for (NntpThreadContainer tail = newc.child; tail != null; tail = tail.next) {
280 tail.parent = newc;
281 }
282
283 old.threadable = null;
284 old.child = null;
285
286 c.parent = old;
287 newc.parent = old;
288
289 // Old is now a dummy- give it 2 kids , c and newc
290 old.child = c;
291 c.next = newc;
292 }
293 // We've done a merge, so keep the same prev
294 c = prev;
295 }
296
297 subjectTable.clear();
298 subjectTable = null;
299
300 }
301
302 /**
303 * Delete any empty or dummy ThreadContainers
304 *
305 * @param parent
306 */
307 private void pruneEmptyContainers(final NntpThreadContainer parent) {
308 NntpThreadContainer container;
309 NntpThreadContainer prev;
310 NntpThreadContainer next;
311 for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = container == null
312 ? null
313 : container.next) {
314
315 // Is it empty and without any children? If so,delete it
316 if (container.threadable == null && container.child == null) {
317 if (prev == null) {
318 parent.child = container.next;
319 } else {
320 prev.next = container.next;
321 }
322 // Set container to prev so that prev keeps its same value the next time through the loop
323 container = prev;
324 // Else if empty, with kids, and (not at root or only one kid)
325 } else if (container.threadable == null && (container.parent != null || container.child.next == null)) {
326 // We have an invalid/expired message with kids. Promote the kids to this level.
327 NntpThreadContainer tail;
328 final NntpThreadContainer kids = container.child;
329
330 // Remove this container and replace with 'kids'.
331 if (prev == null) {
332 parent.child = kids;
333 } else {
334 prev.next = kids;
335 }
336
337 // Make each child's parent be this level's parent -> i.e. promote the children.
338 // Make the last child's next point to this container's next
339 // i.e. splice kids into the list in place of container
340 for (tail = kids; tail.next != null; tail = tail.next) {
341 tail.parent = container.parent;
342 }
343
344 tail.parent = container.parent;
345 tail.next = container.next;
346
347 // next currently points to the item after the inserted items in the chain - reset that, so we process the newly
348 // promoted items next time round
349 next = kids;
350
351 // Set container to prev so that prev keeps its same value the next time through the loop
352 container = prev;
353 } else if (container.child != null) {
354 // A real message , with kids
355 // Iterate over the children
356 pruneEmptyContainers(container);
357 }
358 }
359 }
360
361 /**
362 * The client passes in a list of Iterable objects, and the Threader constructs a connected 'graph' of messages
363 *
364 * @param messages iterable of messages to thread, must not be empty
365 * @return null if messages == null or root.child == null or messages list is empty
366 * @since 3.0
367 */
368 public Threadable thread(final Iterable<? extends Threadable> messages) {
369 if (messages == null) {
370 return null;
371 }
372
373 HashMap<String, NntpThreadContainer> idTable = new HashMap<>();
374
375 // walk through each Threadable element
376 for (final Threadable t : messages) {
377 if (!t.isDummy()) {
378 buildContainer(t, idTable);
379 }
380 }
381
382 if (idTable.isEmpty()) {
383 return null;
384 }
385
386 final NntpThreadContainer root = findRootSet(idTable);
387 idTable.clear();
388 idTable = null;
389
390 pruneEmptyContainers(root);
391
392 root.reverseChildren();
393 gatherSubjects(root);
394
395 if (root.next != null) {
396 throw new IllegalStateException("root node has a next:" + root);
397 }
398
399 for (NntpThreadContainer r = root.child; r != null; r = r.next) {
400 if (r.threadable == null) {
401 r.threadable = r.child.threadable.makeDummy();
402 }
403 }
404
405 final Threadable result = root.child == null ? null : root.child.threadable;
406 root.flush();
407
408 return result;
409 }
410
411 /**
412 * The client passes in a list of Threadable objects, and the Threader constructs a connected 'graph' of messages
413 *
414 * @param messages list of messages to thread, must not be empty
415 * @return null if messages == null or root.child == null or messages list is empty
416 * @since 2.2
417 */
418 public Threadable thread(final List<? extends Threadable> messages) {
419 return thread((Iterable<? extends Threadable>) messages);
420 }
421
422 // DEPRECATED METHODS - for API compatibility only - DO NOT USE
423
424 /**
425 * The client passes in an array of Threadable objects, and the Threader constructs a connected 'graph' of messages
426 *
427 * @param messages array of messages to thread, must not be empty
428 * @return null if messages == null or root.child == null or messages array is empty
429 * @deprecated (2.2) prefer {@link #thread(List)}
430 */
431 @Deprecated
432 public Threadable thread(final Threadable[] messages) {
433 if (messages == null) {
434 return null;
435 }
436 return thread(Arrays.asList(messages));
437 }
438
439 }