001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.net.nntp;
019
020import java.util.Arrays;
021import java.util.HashMap;
022import java.util.List;
023import java.util.Map;
024
025/**
026 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
027 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
028 * For his Java implementation, see
029 * <a href="https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">
030 * https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
031 */
032public class Threader {
033
034    /**
035     * Constructs a new instance.
036     */
037    public Threader() {
038        // empty
039    }
040
041    /**
042     *
043     * @param threadable
044     * @param idTable
045     */
046    private void buildContainer(final Threadable threadable, final HashMap<String, NntpThreadContainer> idTable) {
047        String id = threadable.messageThreadId();
048        NntpThreadContainer container = idTable.get(id);
049        int bogusIdCount = 0;
050
051        // A NntpThreadContainer exists for this id already. This should be a forward reference, but may
052        // be a duplicate id, in which case we will need to generate a bogus placeholder id
053        if (container != null) {
054            if (container.threadable != null) { // oops! duplicate ids...
055                bogusIdCount++; // Avoid dead local store warning
056                id = "<Bogus-id:" + bogusIdCount + ">";
057                container = null;
058            } else {
059                // The container just contained a forward reference to this message, so let's
060                // fill in the threadable field of the container with this message
061                container.threadable = threadable;
062            }
063        }
064
065        // No container exists for that message Id. Create one and insert it into the hash table.
066        if (container == null) {
067            container = new NntpThreadContainer();
068            container.threadable = threadable;
069            idTable.put(id, container);
070        }
071
072        // Iterate through all the references and create ThreadContainers for any references that
073        // don't have them.
074        NntpThreadContainer parentRef = null;
075        {
076            final String[] references = threadable.messageThreadReferences();
077            for (final String refString : references) {
078                NntpThreadContainer ref = idTable.get(refString);
079
080                // if this id doesn't have a container, create one
081                if (ref == null) {
082                    ref = new NntpThreadContainer();
083                    idTable.put(refString, ref);
084                }
085
086                // Link references together in the order they appear in the References: header,
087                // IF they don't have a parent already &&
088                // IF it will not cause a circular reference
089                if (parentRef != null && ref.parent == null && parentRef != ref && !ref.findChild(parentRef)) {
090                    // Link ref into the parent's child list
091                    ref.parent = parentRef;
092                    ref.next = parentRef.child;
093                    parentRef.child = ref;
094                }
095                parentRef = ref;
096            }
097        }
098
099        // parentRef is now set to the container of the last element in the references field. make that
100        // be the parent of this container, unless doing so causes a circular reference
101        if (parentRef != null && (parentRef == container || container.findChild(parentRef))) {
102            parentRef = null;
103        }
104
105        // if it has a parent already, it's because we saw this message in a References: field, and presumed
106        // a parent based on the other entries in that field. Now that we have the actual message, we can
107        // throw away the old parent and use this new one
108        if (container.parent != null) {
109            NntpThreadContainer rest;
110            NntpThreadContainer prev;
111
112            for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) {
113                if (rest == container) {
114                    break;
115                }
116            }
117
118            if (rest == null) {
119                throw new IllegalStateException("Didnt find " + container + " in parent " + container.parent);
120            }
121
122            // Unlink this container from the parent's child list
123            if (prev == null) {
124                container.parent.child = container.next;
125            } else {
126                prev.next = container.next;
127            }
128
129            container.next = null;
130            container.parent = null;
131        }
132
133        // If we have a parent, link container into the parents child list
134        if (parentRef != null) {
135            container.parent = parentRef;
136            container.next = parentRef.child;
137            parentRef.child = container;
138        }
139    }
140
141    /**
142     * Find the root set of all existing ThreadContainers
143     *
144     * @param idTable
145     * @return root the NntpThreadContainer representing the root node
146     */
147    private NntpThreadContainer findRootSet(final HashMap<String, NntpThreadContainer> idTable) {
148        final NntpThreadContainer root = new NntpThreadContainer();
149        for (final Map.Entry<String, NntpThreadContainer> entry : idTable.entrySet()) {
150            final NntpThreadContainer c = entry.getValue();
151            if (c.parent == null) {
152                if (c.next != null) {
153                    throw new IllegalStateException("c.next is " + c.next.toString());
154                }
155                c.next = root.child;
156                root.child = c;
157            }
158        }
159        return root;
160    }
161
162    /**
163     * If any two members of the root set have the same subject, merge them. This is to attempt to accommodate messages without References: headers.
164     *
165     * @param root
166     */
167    private void gatherSubjects(final NntpThreadContainer root) {
168
169        int count = 0;
170
171        for (NntpThreadContainer c = root.child; c != null; c = c.next) {
172            count++;
173        }
174
175        // TODO verify this will avoid rehashing
176        HashMap<String, NntpThreadContainer> subjectTable = new HashMap<>((int) (count * 1.2), (float) 0.9);
177        count = 0;
178
179        for (NntpThreadContainer c = root.child; c != null; c = c.next) {
180            Threadable threadable = c.threadable;
181
182            // No threadable? If so, it is a dummy node in the root set.
183            // Only root set members may be dummies, and they always have at least 2 kids
184            // Take the first kid as representative of the subject
185            if (threadable == null) {
186                threadable = c.child.threadable;
187            }
188
189            final String subj = threadable.simplifiedSubject();
190
191            if (subj == null || subj.isEmpty()) {
192                continue;
193            }
194
195            final NntpThreadContainer old = subjectTable.get(subj);
196
197            // Add this container to the table iff:
198            // - There exists no container with this subject
199            // - or this is a dummy container and the old one is not - the dummy one is
200            // more interesting as a root, so put it in the table instead
201            // - The container in the table has a "Re:" version of this subject, and
202            // this container has a non-"Re:" version of this subject. The non-"Re:" version
203            // is the more interesting of the two.
204            if (old == null || c.threadable == null && old.threadable != null
205                    || old.threadable != null && old.threadable.subjectIsReply() && c.threadable != null && !c.threadable.subjectIsReply()) {
206                subjectTable.put(subj, c);
207                count++;
208            }
209        }
210
211        // If the table is empty, we're done
212        if (count == 0) {
213            return;
214        }
215
216        // subjectTable is now populated with one entry for each subject which occurs in the
217        // root set. Iterate over the root set, and gather together the difference.
218        NntpThreadContainer prev;
219        NntpThreadContainer c;
220        NntpThreadContainer rest;
221        for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = rest == null ? null : rest.next) {
222            Threadable threadable = c.threadable;
223
224            // is it a dummy node?
225            if (threadable == null) {
226                threadable = c.child.threadable;
227            }
228
229            final String subj = threadable.simplifiedSubject();
230
231            // Don't thread together all subjectless messages
232            if (subj == null || subj.isEmpty()) {
233                continue;
234            }
235
236            final NntpThreadContainer old = subjectTable.get(subj);
237
238            if (old == c) { // That's us
239                continue;
240            }
241
242            // We have now found another container in the root set with the same subject
243            // Remove the "second" message from the root set
244            if (prev == null) {
245                root.child = c.next;
246            } else {
247                prev.next = c.next;
248            }
249            c.next = null;
250
251            if (old.threadable == null && c.threadable == null) {
252                // both dummies - merge them
253                NntpThreadContainer tail;
254                for (tail = old.child; tail != null && tail.next != null; tail = tail.next) {
255                    // do nothing
256                }
257
258                if (tail != null) { // protect against possible NPE
259                    tail.next = c.child;
260                }
261
262                for (tail = c.child; tail != null; tail = tail.next) {
263                    tail.parent = old;
264                }
265
266                c.child = null;
267            } else if (old.threadable == null || c.threadable != null && c.threadable.subjectIsReply() && !old.threadable.subjectIsReply()) {
268                // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
269                c.parent = old;
270                c.next = old.child;
271                old.child = c;
272            } else {
273                // else make the old and new messages be children of a new dummy container.
274                // We create a new container object for old.msg and empty the old container
275                final NntpThreadContainer newc = new NntpThreadContainer();
276                newc.threadable = old.threadable;
277                newc.child = old.child;
278
279                for (NntpThreadContainer tail = newc.child; tail != null; tail = tail.next) {
280                    tail.parent = newc;
281                }
282
283                old.threadable = null;
284                old.child = null;
285
286                c.parent = old;
287                newc.parent = old;
288
289                // Old is now a dummy- give it 2 kids , c and newc
290                old.child = c;
291                c.next = newc;
292            }
293            // We've done a merge, so keep the same prev
294            c = prev;
295        }
296
297        subjectTable.clear();
298        subjectTable = null;
299
300    }
301
302    /**
303     * Delete any empty or dummy ThreadContainers
304     *
305     * @param parent
306     */
307    private void pruneEmptyContainers(final NntpThreadContainer parent) {
308        NntpThreadContainer container;
309        NntpThreadContainer prev;
310        NntpThreadContainer next;
311        for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = container == null
312                ? null
313                : container.next) {
314
315            // Is it empty and without any children? If so,delete it
316            if (container.threadable == null && container.child == null) {
317                if (prev == null) {
318                    parent.child = container.next;
319                } else {
320                    prev.next = container.next;
321                }
322                // Set container to prev so that prev keeps its same value the next time through the loop
323                container = prev;
324                // Else if empty, with kids, and (not at root or only one kid)
325            } else if (container.threadable == null && (container.parent != null || container.child.next == null)) {
326                // We have an invalid/expired message with kids. Promote the kids to this level.
327                NntpThreadContainer tail;
328                final NntpThreadContainer kids = container.child;
329
330                // Remove this container and replace with 'kids'.
331                if (prev == null) {
332                    parent.child = kids;
333                } else {
334                    prev.next = kids;
335                }
336
337                // Make each child's parent be this level's parent -> i.e. promote the children.
338                // Make the last child's next point to this container's next
339                // i.e. splice kids into the list in place of container
340                for (tail = kids; tail.next != null; tail = tail.next) {
341                    tail.parent = container.parent;
342                }
343
344                tail.parent = container.parent;
345                tail.next = container.next;
346
347                // next currently points to the item after the inserted items in the chain - reset that, so we process the newly
348                // promoted items next time round
349                next = kids;
350
351                // Set container to prev so that prev keeps its same value the next time through the loop
352                container = prev;
353            } else if (container.child != null) {
354                // A real message , with kids
355                // Iterate over the children
356                pruneEmptyContainers(container);
357            }
358        }
359    }
360
361    /**
362     * The client passes in a list of Iterable objects, and the Threader constructs a connected 'graph' of messages
363     *
364     * @param messages iterable of messages to thread, must not be empty
365     * @return null if messages == null or root.child == null or messages list is empty
366     * @since 3.0
367     */
368    public Threadable thread(final Iterable<? extends Threadable> messages) {
369        if (messages == null) {
370            return null;
371        }
372
373        HashMap<String, NntpThreadContainer> idTable = new HashMap<>();
374
375        // walk through each Threadable element
376        for (final Threadable t : messages) {
377            if (!t.isDummy()) {
378                buildContainer(t, idTable);
379            }
380        }
381
382        if (idTable.isEmpty()) {
383            return null;
384        }
385
386        final NntpThreadContainer root = findRootSet(idTable);
387        idTable.clear();
388        idTable = null;
389
390        pruneEmptyContainers(root);
391
392        root.reverseChildren();
393        gatherSubjects(root);
394
395        if (root.next != null) {
396            throw new IllegalStateException("root node has a next:" + root);
397        }
398
399        for (NntpThreadContainer r = root.child; r != null; r = r.next) {
400            if (r.threadable == null) {
401                r.threadable = r.child.threadable.makeDummy();
402            }
403        }
404
405        final Threadable result = root.child == null ? null : root.child.threadable;
406        root.flush();
407
408        return result;
409    }
410
411    /**
412     * The client passes in a list of Threadable objects, and the Threader constructs a connected 'graph' of messages
413     *
414     * @param messages list of messages to thread, must not be empty
415     * @return null if messages == null or root.child == null or messages list is empty
416     * @since 2.2
417     */
418    public Threadable thread(final List<? extends Threadable> messages) {
419        return thread((Iterable<? extends Threadable>) messages);
420    }
421
422    // DEPRECATED METHODS - for API compatibility only - DO NOT USE
423
424    /**
425     * The client passes in an array of Threadable objects, and the Threader constructs a connected 'graph' of messages
426     *
427     * @param messages array of messages to thread, must not be empty
428     * @return null if messages == null or root.child == null or messages array is empty
429     * @deprecated (2.2) prefer {@link #thread(List)}
430     */
431    @Deprecated
432    public Threadable thread(final Threadable[] messages) {
433        if (messages == null) {
434            return null;
435        }
436        return thread(Arrays.asList(messages));
437    }
438
439}