View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.net.nntp;
19  
20  import java.util.Arrays;
21  import java.util.HashMap;
22  import java.util.List;
23  import java.util.Map;
24  
25  /**
26   * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
27   * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
28   * For his Java implementation, see
29   * <a href="https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">
30   * https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
31   */
32  public class Threader {
33  
34      /**
35       * Constructs a new instance.
36       */
37      public Threader() {
38          // empty
39      }
40  
41      /**
42       *
43       * @param threadable
44       * @param idTable
45       */
46      private void buildContainer(final Threadable threadable, final HashMap<String, NntpThreadContainer> idTable) {
47          String id = threadable.messageThreadId();
48          NntpThreadContainer container = idTable.get(id);
49          int bogusIdCount = 0;
50  
51          // A NntpThreadContainer exists for this id already. This should be a forward reference, but may
52          // be a duplicate id, in which case we will need to generate a bogus placeholder id
53          if (container != null) {
54              if (container.threadable != null) { // oops! duplicate ids...
55                  bogusIdCount++; // Avoid dead local store warning
56                  id = "<Bogus-id:" + bogusIdCount + ">";
57                  container = null;
58              } else {
59                  // The container just contained a forward reference to this message, so let's
60                  // fill in the threadable field of the container with this message
61                  container.threadable = threadable;
62              }
63          }
64  
65          // No container exists for that message Id. Create one and insert it into the hash table.
66          if (container == null) {
67              container = new NntpThreadContainer();
68              container.threadable = threadable;
69              idTable.put(id, container);
70          }
71  
72          // Iterate through all the references and create ThreadContainers for any references that
73          // don't have them.
74          NntpThreadContainer parentRef = null;
75          {
76              final String[] references = threadable.messageThreadReferences();
77              for (final String refString : references) {
78                  NntpThreadContainer ref = idTable.get(refString);
79  
80                  // if this id doesn't have a container, create one
81                  if (ref == null) {
82                      ref = new NntpThreadContainer();
83                      idTable.put(refString, ref);
84                  }
85  
86                  // Link references together in the order they appear in the References: header,
87                  // IF they don't have a parent already &&
88                  // IF it will not cause a circular reference
89                  if (parentRef != null && ref.parent == null && parentRef != ref && !ref.findChild(parentRef)) {
90                      // Link ref into the parent's child list
91                      ref.parent = parentRef;
92                      ref.next = parentRef.child;
93                      parentRef.child = ref;
94                  }
95                  parentRef = ref;
96              }
97          }
98  
99          // parentRef is now set to the container of the last element in the references field. make that
100         // be the parent of this container, unless doing so causes a circular reference
101         if (parentRef != null && (parentRef == container || container.findChild(parentRef))) {
102             parentRef = null;
103         }
104 
105         // if it has a parent already, it's because we saw this message in a References: field, and presumed
106         // a parent based on the other entries in that field. Now that we have the actual message, we can
107         // throw away the old parent and use this new one
108         if (container.parent != null) {
109             NntpThreadContainer rest;
110             NntpThreadContainer prev;
111 
112             for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) {
113                 if (rest == container) {
114                     break;
115                 }
116             }
117 
118             if (rest == null) {
119                 throw new IllegalStateException("Didnt find " + container + " in parent " + container.parent);
120             }
121 
122             // Unlink this container from the parent's child list
123             if (prev == null) {
124                 container.parent.child = container.next;
125             } else {
126                 prev.next = container.next;
127             }
128 
129             container.next = null;
130             container.parent = null;
131         }
132 
133         // If we have a parent, link container into the parents child list
134         if (parentRef != null) {
135             container.parent = parentRef;
136             container.next = parentRef.child;
137             parentRef.child = container;
138         }
139     }
140 
141     /**
142      * Find the root set of all existing ThreadContainers
143      *
144      * @param idTable
145      * @return root the NntpThreadContainer representing the root node
146      */
147     private NntpThreadContainer findRootSet(final HashMap<String, NntpThreadContainer> idTable) {
148         final NntpThreadContainer root = new NntpThreadContainer();
149         for (final Map.Entry<String, NntpThreadContainer> entry : idTable.entrySet()) {
150             final NntpThreadContainer c = entry.getValue();
151             if (c.parent == null) {
152                 if (c.next != null) {
153                     throw new IllegalStateException("c.next is " + c.next.toString());
154                 }
155                 c.next = root.child;
156                 root.child = c;
157             }
158         }
159         return root;
160     }
161 
162     /**
163      * If any two members of the root set have the same subject, merge them. This is to attempt to accommodate messages without References: headers.
164      *
165      * @param root
166      */
167     private void gatherSubjects(final NntpThreadContainer root) {
168 
169         int count = 0;
170 
171         for (NntpThreadContainer c = root.child; c != null; c = c.next) {
172             count++;
173         }
174 
175         // TODO verify this will avoid rehashing
176         HashMap<String, NntpThreadContainer> subjectTable = new HashMap<>((int) (count * 1.2), (float) 0.9);
177         count = 0;
178 
179         for (NntpThreadContainer c = root.child; c != null; c = c.next) {
180             Threadable threadable = c.threadable;
181 
182             // No threadable? If so, it is a dummy node in the root set.
183             // Only root set members may be dummies, and they always have at least 2 kids
184             // Take the first kid as representative of the subject
185             if (threadable == null) {
186                 threadable = c.child.threadable;
187             }
188 
189             final String subj = threadable.simplifiedSubject();
190 
191             if (subj == null || subj.isEmpty()) {
192                 continue;
193             }
194 
195             final NntpThreadContainer old = subjectTable.get(subj);
196 
197             // Add this container to the table iff:
198             // - There exists no container with this subject
199             // - or this is a dummy container and the old one is not - the dummy one is
200             // more interesting as a root, so put it in the table instead
201             // - The container in the table has a "Re:" version of this subject, and
202             // this container has a non-"Re:" version of this subject. The non-"Re:" version
203             // is the more interesting of the two.
204             if (old == null || c.threadable == null && old.threadable != null
205                     || old.threadable != null && old.threadable.subjectIsReply() && c.threadable != null && !c.threadable.subjectIsReply()) {
206                 subjectTable.put(subj, c);
207                 count++;
208             }
209         }
210 
211         // If the table is empty, we're done
212         if (count == 0) {
213             return;
214         }
215 
216         // subjectTable is now populated with one entry for each subject which occurs in the
217         // root set. Iterate over the root set, and gather together the difference.
218         NntpThreadContainer prev;
219         NntpThreadContainer c;
220         NntpThreadContainer rest;
221         for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = rest == null ? null : rest.next) {
222             Threadable threadable = c.threadable;
223 
224             // is it a dummy node?
225             if (threadable == null) {
226                 threadable = c.child.threadable;
227             }
228 
229             final String subj = threadable.simplifiedSubject();
230 
231             // Don't thread together all subjectless messages
232             if (subj == null || subj.isEmpty()) {
233                 continue;
234             }
235 
236             final NntpThreadContainer old = subjectTable.get(subj);
237 
238             if (old == c) { // That's us
239                 continue;
240             }
241 
242             // We have now found another container in the root set with the same subject
243             // Remove the "second" message from the root set
244             if (prev == null) {
245                 root.child = c.next;
246             } else {
247                 prev.next = c.next;
248             }
249             c.next = null;
250 
251             if (old.threadable == null && c.threadable == null) {
252                 // both dummies - merge them
253                 NntpThreadContainer tail;
254                 for (tail = old.child; tail != null && tail.next != null; tail = tail.next) {
255                     // do nothing
256                 }
257 
258                 if (tail != null) { // protect against possible NPE
259                     tail.next = c.child;
260                 }
261 
262                 for (tail = c.child; tail != null; tail = tail.next) {
263                     tail.parent = old;
264                 }
265 
266                 c.child = null;
267             } else if (old.threadable == null || c.threadable != null && c.threadable.subjectIsReply() && !old.threadable.subjectIsReply()) {
268                 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
269                 c.parent = old;
270                 c.next = old.child;
271                 old.child = c;
272             } else {
273                 // else make the old and new messages be children of a new dummy container.
274                 // We create a new container object for old.msg and empty the old container
275                 final NntpThreadContainer newc = new NntpThreadContainer();
276                 newc.threadable = old.threadable;
277                 newc.child = old.child;
278 
279                 for (NntpThreadContainer tail = newc.child; tail != null; tail = tail.next) {
280                     tail.parent = newc;
281                 }
282 
283                 old.threadable = null;
284                 old.child = null;
285 
286                 c.parent = old;
287                 newc.parent = old;
288 
289                 // Old is now a dummy- give it 2 kids , c and newc
290                 old.child = c;
291                 c.next = newc;
292             }
293             // We've done a merge, so keep the same prev
294             c = prev;
295         }
296 
297         subjectTable.clear();
298         subjectTable = null;
299 
300     }
301 
302     /**
303      * Delete any empty or dummy ThreadContainers
304      *
305      * @param parent
306      */
307     private void pruneEmptyContainers(final NntpThreadContainer parent) {
308         NntpThreadContainer container;
309         NntpThreadContainer prev;
310         NntpThreadContainer next;
311         for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = container == null
312                 ? null
313                 : container.next) {
314 
315             // Is it empty and without any children? If so,delete it
316             if (container.threadable == null && container.child == null) {
317                 if (prev == null) {
318                     parent.child = container.next;
319                 } else {
320                     prev.next = container.next;
321                 }
322                 // Set container to prev so that prev keeps its same value the next time through the loop
323                 container = prev;
324                 // Else if empty, with kids, and (not at root or only one kid)
325             } else if (container.threadable == null && (container.parent != null || container.child.next == null)) {
326                 // We have an invalid/expired message with kids. Promote the kids to this level.
327                 NntpThreadContainer tail;
328                 final NntpThreadContainer kids = container.child;
329 
330                 // Remove this container and replace with 'kids'.
331                 if (prev == null) {
332                     parent.child = kids;
333                 } else {
334                     prev.next = kids;
335                 }
336 
337                 // Make each child's parent be this level's parent -> i.e. promote the children.
338                 // Make the last child's next point to this container's next
339                 // i.e. splice kids into the list in place of container
340                 for (tail = kids; tail.next != null; tail = tail.next) {
341                     tail.parent = container.parent;
342                 }
343 
344                 tail.parent = container.parent;
345                 tail.next = container.next;
346 
347                 // next currently points to the item after the inserted items in the chain - reset that, so we process the newly
348                 // promoted items next time round
349                 next = kids;
350 
351                 // Set container to prev so that prev keeps its same value the next time through the loop
352                 container = prev;
353             } else if (container.child != null) {
354                 // A real message , with kids
355                 // Iterate over the children
356                 pruneEmptyContainers(container);
357             }
358         }
359     }
360 
361     /**
362      * The client passes in a list of Iterable objects, and the Threader constructs a connected 'graph' of messages
363      *
364      * @param messages iterable of messages to thread, must not be empty
365      * @return null if messages == null or root.child == null or messages list is empty
366      * @since 3.0
367      */
368     public Threadable thread(final Iterable<? extends Threadable> messages) {
369         if (messages == null) {
370             return null;
371         }
372 
373         HashMap<String, NntpThreadContainer> idTable = new HashMap<>();
374 
375         // walk through each Threadable element
376         for (final Threadable t : messages) {
377             if (!t.isDummy()) {
378                 buildContainer(t, idTable);
379             }
380         }
381 
382         if (idTable.isEmpty()) {
383             return null;
384         }
385 
386         final NntpThreadContainer root = findRootSet(idTable);
387         idTable.clear();
388         idTable = null;
389 
390         pruneEmptyContainers(root);
391 
392         root.reverseChildren();
393         gatherSubjects(root);
394 
395         if (root.next != null) {
396             throw new IllegalStateException("root node has a next:" + root);
397         }
398 
399         for (NntpThreadContainer r = root.child; r != null; r = r.next) {
400             if (r.threadable == null) {
401                 r.threadable = r.child.threadable.makeDummy();
402             }
403         }
404 
405         final Threadable result = root.child == null ? null : root.child.threadable;
406         root.flush();
407 
408         return result;
409     }
410 
411     /**
412      * The client passes in a list of Threadable objects, and the Threader constructs a connected 'graph' of messages
413      *
414      * @param messages list of messages to thread, must not be empty
415      * @return null if messages == null or root.child == null or messages list is empty
416      * @since 2.2
417      */
418     public Threadable thread(final List<? extends Threadable> messages) {
419         return thread((Iterable<? extends Threadable>) messages);
420     }
421 
422     // DEPRECATED METHODS - for API compatibility only - DO NOT USE
423 
424     /**
425      * The client passes in an array of Threadable objects, and the Threader constructs a connected 'graph' of messages
426      *
427      * @param messages array of messages to thread, must not be empty
428      * @return null if messages == null or root.child == null or messages array is empty
429      * @deprecated (2.2) prefer {@link #thread(List)}
430      */
431     @Deprecated
432     public Threadable thread(final Threadable[] messages) {
433         if (messages == null) {
434             return null;
435         }
436         return thread(Arrays.asList(messages));
437     }
438 
439 }