View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.net.nntp;
19  
20  import java.util.Arrays;
21  import java.util.HashMap;
22  import java.util.List;
23  import java.util.Map;
24  
25  /**
26   * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
27   * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
28   * For his Java implementation, see
29   * <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">
30   * http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
31   */
32  public class Threader {
33  
34      /**
35       *
36       * @param threadable
37       * @param idTable
38       */
39      private void buildContainer(final Threadable threadable, final HashMap<String, NntpThreadContainer> idTable) {
40          String id = threadable.messageThreadId();
41          NntpThreadContainer container = idTable.get(id);
42          int bogusIdCount = 0;
43  
44          // A NntpThreadContainer exists for this id already. This should be a forward reference, but may
45          // be a duplicate id, in which case we will need to generate a bogus placeholder id
46          if (container != null) {
47              if (container.threadable != null) { // oops! duplicate ids...
48                  bogusIdCount++; // Avoid dead local store warning
49                  id = "<Bogus-id:" + bogusIdCount + ">";
50                  container = null;
51              } else {
52                  // The container just contained a forward reference to this message, so let's
53                  // fill in the threadable field of the container with this message
54                  container.threadable = threadable;
55              }
56          }
57  
58          // No container exists for that message Id. Create one and insert it into the hash table.
59          if (container == null) {
60              container = new NntpThreadContainer();
61              container.threadable = threadable;
62              idTable.put(id, container);
63          }
64  
65          // Iterate through all the references and create ThreadContainers for any references that
66          // don't have them.
67          NntpThreadContainer parentRef = null;
68          {
69              final String[] references = threadable.messageThreadReferences();
70              for (final String refString : references) {
71                  NntpThreadContainer ref = idTable.get(refString);
72  
73                  // if this id doesn't have a container, create one
74                  if (ref == null) {
75                      ref = new NntpThreadContainer();
76                      idTable.put(refString, ref);
77                  }
78  
79                  // Link references together in the order they appear in the References: header,
80                  // IF they don't have a parent already &&
81                  // IF it will not cause a circular reference
82                  if (parentRef != null && ref.parent == null && parentRef != ref && !ref.findChild(parentRef)) {
83                      // Link ref into the parent's child list
84                      ref.parent = parentRef;
85                      ref.next = parentRef.child;
86                      parentRef.child = ref;
87                  }
88                  parentRef = ref;
89              }
90          }
91  
92          // parentRef is now set to the container of the last element in the references field. make that
93          // be the parent of this container, unless doing so causes a circular reference
94          if (parentRef != null && (parentRef == container || container.findChild(parentRef))) {
95              parentRef = null;
96          }
97  
98          // if it has a parent already, it's because we saw this message in a References: field, and presumed
99          // a parent based on the other entries in that field. Now that we have the actual message, we can
100         // throw away the old parent and use this new one
101         if (container.parent != null) {
102             NntpThreadContainer rest, prev;
103 
104             for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) {
105                 if (rest == container) {
106                     break;
107                 }
108             }
109 
110             if (rest == null) {
111                 throw new IllegalStateException("Didnt find " + container + " in parent " + container.parent);
112             }
113 
114             // Unlink this container from the parent's child list
115             if (prev == null) {
116                 container.parent.child = container.next;
117             } else {
118                 prev.next = container.next;
119             }
120 
121             container.next = null;
122             container.parent = null;
123         }
124 
125         // If we have a parent, link container into the parents child list
126         if (parentRef != null) {
127             container.parent = parentRef;
128             container.next = parentRef.child;
129             parentRef.child = container;
130         }
131     }
132 
133     /**
134      * Find the root set of all existing ThreadContainers
135      *
136      * @param idTable
137      * @return root the NntpThreadContainer representing the root node
138      */
139     private NntpThreadContainer findRootSet(final HashMap<String, NntpThreadContainer> idTable) {
140         final NntpThreadContainer root = new NntpThreadContainer();
141         for (final Map.Entry<String, NntpThreadContainer> entry : idTable.entrySet()) {
142             final NntpThreadContainer c = entry.getValue();
143             if (c.parent == null) {
144                 if (c.next != null) {
145                     throw new IllegalStateException("c.next is " + c.next.toString());
146                 }
147                 c.next = root.child;
148                 root.child = c;
149             }
150         }
151         return root;
152     }
153 
154     /**
155      * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
156      *
157      * @param root
158      */
159     private void gatherSubjects(final NntpThreadContainer root) {
160 
161         int count = 0;
162 
163         for (NntpThreadContainer c = root.child; c != null; c = c.next) {
164             count++;
165         }
166 
167         // TODO verify this will avoid rehashing
168         HashMap<String, NntpThreadContainer> subjectTable = new HashMap<>((int) (count * 1.2), (float) 0.9);
169         count = 0;
170 
171         for (NntpThreadContainer c = root.child; c != null; c = c.next) {
172             Threadable threadable = c.threadable;
173 
174             // No threadable? If so, it is a dummy node in the root set.
175             // Only root set members may be dummies, and they always have at least 2 kids
176             // Take the first kid as representative of the subject
177             if (threadable == null) {
178                 threadable = c.child.threadable;
179             }
180 
181             final String subj = threadable.simplifiedSubject();
182 
183             if (subj == null || subj.isEmpty()) {
184                 continue;
185             }
186 
187             final NntpThreadContainer old = subjectTable.get(subj);
188 
189             // Add this container to the table iff:
190             // - There exists no container with this subject
191             // - or this is a dummy container and the old one is not - the dummy one is
192             // more interesting as a root, so put it in the table instead
193             // - The container in the table has a "Re:" version of this subject, and
194             // this container has a non-"Re:" version of this subject. The non-"Re:" version
195             // is the more interesting of the two.
196             if (old == null || c.threadable == null && old.threadable != null
197                     || old.threadable != null && old.threadable.subjectIsReply() && c.threadable != null && !c.threadable.subjectIsReply()) {
198                 subjectTable.put(subj, c);
199                 count++;
200             }
201         }
202 
203         // If the table is empty, we're done
204         if (count == 0) {
205             return;
206         }
207 
208         // subjectTable is now populated with one entry for each subject which occurs in the
209         // root set. Iterate over the root set, and gather together the difference.
210         NntpThreadContainer prev, c, rest;
211         for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = rest == null ? null : rest.next) {
212             Threadable threadable = c.threadable;
213 
214             // is it a dummy node?
215             if (threadable == null) {
216                 threadable = c.child.threadable;
217             }
218 
219             final String subj = threadable.simplifiedSubject();
220 
221             // Don't thread together all subjectless messages
222             if (subj == null || subj.isEmpty()) {
223                 continue;
224             }
225 
226             final NntpThreadContainer old = subjectTable.get(subj);
227 
228             if (old == c) { // That's us
229                 continue;
230             }
231 
232             // We have now found another container in the root set with the same subject
233             // Remove the "second" message from the root set
234             if (prev == null) {
235                 root.child = c.next;
236             } else {
237                 prev.next = c.next;
238             }
239             c.next = null;
240 
241             if (old.threadable == null && c.threadable == null) {
242                 // both dummies - merge them
243                 NntpThreadContainer tail;
244                 for (tail = old.child; tail != null && tail.next != null; tail = tail.next) {
245                     // do nothing
246                 }
247 
248                 if (tail != null) { // protect against possible NPE
249                     tail.next = c.child;
250                 }
251 
252                 for (tail = c.child; tail != null; tail = tail.next) {
253                     tail.parent = old;
254                 }
255 
256                 c.child = null;
257             } else if (old.threadable == null || c.threadable != null && c.threadable.subjectIsReply() && !old.threadable.subjectIsReply()) {
258                 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
259                 c.parent = old;
260                 c.next = old.child;
261                 old.child = c;
262             } else {
263                 // else make the old and new messages be children of a new dummy container.
264                 // We create a new container object for old.msg and empty the old container
265                 final NntpThreadContainer newc = new NntpThreadContainer();
266                 newc.threadable = old.threadable;
267                 newc.child = old.child;
268 
269                 for (NntpThreadContainer tail = newc.child; tail != null; tail = tail.next) {
270                     tail.parent = newc;
271                 }
272 
273                 old.threadable = null;
274                 old.child = null;
275 
276                 c.parent = old;
277                 newc.parent = old;
278 
279                 // Old is now a dummy- give it 2 kids , c and newc
280                 old.child = c;
281                 c.next = newc;
282             }
283             // We've done a merge, so keep the same prev
284             c = prev;
285         }
286 
287         subjectTable.clear();
288         subjectTable = null;
289 
290     }
291 
292     /**
293      * Delete any empty or dummy ThreadContainers
294      *
295      * @param parent
296      */
297     private void pruneEmptyContainers(final NntpThreadContainer parent) {
298         NntpThreadContainer container, prev, next;
299         for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = container == null
300                 ? null
301                 : container.next) {
302 
303             // Is it empty and without any children? If so,delete it
304             if (container.threadable == null && container.child == null) {
305                 if (prev == null) {
306                     parent.child = container.next;
307                 } else {
308                     prev.next = container.next;
309                 }
310 
311                 // Set container to prev so that prev keeps its same value the next time through the loop
312                 container = prev;
313             }
314 
315             // Else if empty, with kids, and (not at root or only one kid)
316             else if (container.threadable == null && (container.parent != null || container.child.next == null)) {
317                 // We have an invalid/expired message with kids. Promote the kids to this level.
318                 NntpThreadContainer tail;
319                 final NntpThreadContainer kids = container.child;
320 
321                 // Remove this container and replace with 'kids'.
322                 if (prev == null) {
323                     parent.child = kids;
324                 } else {
325                     prev.next = kids;
326                 }
327 
328                 // Make each child's parent be this level's parent -> i.e. promote the children.
329                 // Make the last child's next point to this container's next
330                 // i.e. splice kids into the list in place of container
331                 for (tail = kids; tail.next != null; tail = tail.next) {
332                     tail.parent = container.parent;
333                 }
334 
335                 tail.parent = container.parent;
336                 tail.next = container.next;
337 
338                 // next currently points to the item after the inserted items in the chain - reset that, so we process the newly
339                 // promoted items next time round
340                 next = kids;
341 
342                 // Set container to prev so that prev keeps its same value the next time through the loop
343                 container = prev;
344             } else if (container.child != null) {
345                 // A real message , with kids
346                 // Iterate over the children
347                 pruneEmptyContainers(container);
348             }
349         }
350     }
351 
352     /**
353      * The client passes in a list of Iterable objects, and the Threader constructs a connected 'graph' of messages
354      *
355      * @param messages iterable of messages to thread, must not be empty
356      * @return null if messages == null or root.child == null or messages list is empty
357      * @since 3.0
358      */
359     public Threadable thread(final Iterable<? extends Threadable> messages) {
360         if (messages == null) {
361             return null;
362         }
363 
364         HashMap<String, NntpThreadContainer> idTable = new HashMap<>();
365 
366         // walk through each Threadable element
367         for (final Threadable t : messages) {
368             if (!t.isDummy()) {
369                 buildContainer(t, idTable);
370             }
371         }
372 
373         if (idTable.isEmpty()) {
374             return null;
375         }
376 
377         final NntpThreadContainer root = findRootSet(idTable);
378         idTable.clear();
379         idTable = null;
380 
381         pruneEmptyContainers(root);
382 
383         root.reverseChildren();
384         gatherSubjects(root);
385 
386         if (root.next != null) {
387             throw new IllegalStateException("root node has a next:" + root);
388         }
389 
390         for (NntpThreadContainer r = root.child; r != null; r = r.next) {
391             if (r.threadable == null) {
392                 r.threadable = r.child.threadable.makeDummy();
393             }
394         }
395 
396         final Threadable result = root.child == null ? null : root.child.threadable;
397         root.flush();
398 
399         return result;
400     }
401 
402     /**
403      * The client passes in a list of Threadable objects, and the Threader constructs a connected 'graph' of messages
404      *
405      * @param messages list of messages to thread, must not be empty
406      * @return null if messages == null or root.child == null or messages list is empty
407      * @since 2.2
408      */
409     public Threadable thread(final List<? extends Threadable> messages) {
410         return thread((Iterable<? extends Threadable>) messages);
411     }
412 
413     // DEPRECATED METHODS - for API compatibility only - DO NOT USE
414 
415     /**
416      * The client passes in an array of Threadable objects, and the Threader constructs a connected 'graph' of messages
417      *
418      * @param messages array of messages to thread, must not be empty
419      * @return null if messages == null or root.child == null or messages array is empty
420      * @deprecated (2.2) prefer {@link #thread(List)}
421      */
422     @Deprecated
423     public Threadable thread(final Threadable[] messages) {
424         if (messages == null) {
425             return null;
426         }
427         return thread(Arrays.asList(messages));
428     }
429 
430 }