001/* 002 * Copyright (C) 2017 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.graph; 018 019import static com.google.common.base.Preconditions.checkArgument; 020import static com.google.common.base.Preconditions.checkNotNull; 021 022import com.google.common.annotations.Beta; 023import com.google.common.collect.AbstractIterator; 024import com.google.common.collect.ImmutableSet; 025import com.google.errorprone.annotations.DoNotMock; 026import java.util.ArrayDeque; 027import java.util.Deque; 028import java.util.HashSet; 029import java.util.Iterator; 030import java.util.Set; 031import org.checkerframework.checker.nullness.qual.Nullable; 032 033/** 034 * An object that can traverse the nodes that are reachable from a specified (set of) start node(s) 035 * using a specified {@link SuccessorsFunction}. 036 * 037 * <p>There are two entry points for creating a {@code Traverser}: {@link 038 * #forTree(SuccessorsFunction)} and {@link #forGraph(SuccessorsFunction)}. You should choose one 039 * based on your answers to the following questions: 040 * 041 * <ol> 042 * <li>Is there only one path to any node that's reachable from any start node? (If so, the graph 043 * to be traversed is a tree or forest even if it is a subgraph of a graph which is neither.) 044 * <li>Are the node objects' implementations of {@code equals()}/{@code hashCode()} <a 045 * href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursive</a>? 046 * </ol> 047 * 048 * <p>If your answers are: 049 * 050 * <ul> 051 * <li>(1) "no" and (2) "no", use {@link #forGraph(SuccessorsFunction)}. 052 * <li>(1) "yes" and (2) "yes", use {@link #forTree(SuccessorsFunction)}. 053 * <li>(1) "yes" and (2) "no", you can use either, but {@code forTree()} will be more efficient. 054 * <li>(1) "no" and (2) "yes", <b><i>neither will work</i></b>, but if you transform your node 055 * objects into a non-recursive form, you can use {@code forGraph()}. 056 * </ul> 057 * 058 * @author Jens Nyman 059 * @param <N> Node parameter type 060 * @since 23.1 061 */ 062@Beta 063@DoNotMock( 064 "Call forGraph or forTree, passing a lambda or a Graph with the desired edges (built with" 065 + " GraphBuilder)") 066public abstract class Traverser<N> { 067 private final SuccessorsFunction<N> successorFunction; 068 069 private Traverser(SuccessorsFunction<N> successorFunction) { 070 this.successorFunction = checkNotNull(successorFunction); 071 } 072 073 /** 074 * Creates a new traverser for the given general {@code graph}. 075 * 076 * <p>Traversers created using this method are guaranteed to visit each node reachable from the 077 * start node(s) at most once. 078 * 079 * <p>If you know that no node in {@code graph} is reachable by more than one path from the start 080 * node(s), consider using {@link #forTree(SuccessorsFunction)} instead. 081 * 082 * <p><b>Performance notes</b> 083 * 084 * <ul> 085 * <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from 086 * the start node), assuming that the node objects have <i>O(1)</i> {@code equals()} and 087 * {@code hashCode()} implementations. (See the <a 088 * href="https://github.com/google/guava/wiki/GraphsExplained#elements-must-be-useable-as-map-keys"> 089 * notes on element objects</a> for more information.) 090 * <li>While traversing, the traverser will use <i>O(n)</i> space (where <i>n</i> is the number 091 * of nodes that have thus far been visited), plus <i>O(H)</i> space (where <i>H</i> is the 092 * number of nodes that have been seen but not yet visited, that is, the "horizon"). 093 * </ul> 094 * 095 * @param graph {@link SuccessorsFunction} representing a general graph that may have cycles. 096 */ 097 public static <N> Traverser<N> forGraph(final SuccessorsFunction<N> graph) { 098 return new Traverser<N>(graph) { 099 @Override 100 Traversal<N> newTraversal() { 101 return Traversal.inGraph(graph); 102 } 103 }; 104 } 105 106 /** 107 * Creates a new traverser for a directed acyclic graph that has at most one path from the start 108 * node(s) to any node reachable from the start node(s), and has no paths from any start node to 109 * any other start node, such as a tree or forest. 110 * 111 * <p>{@code forTree()} is especially useful (versus {@code forGraph()}) in cases where the data 112 * structure being traversed is, in addition to being a tree/forest, also defined <a 113 * href="https://github.com/google/guava/wiki/GraphsExplained#non-recursiveness">recursively</a>. 114 * This is because the {@code forTree()}-based implementations don't keep track of visited nodes, 115 * and therefore don't need to call `equals()` or `hashCode()` on the node objects; this saves 116 * both time and space versus traversing the same graph using {@code forGraph()}. 117 * 118 * <p>Providing a graph to be traversed for which there is more than one path from the start 119 * node(s) to any node may lead to: 120 * 121 * <ul> 122 * <li>Traversal not terminating (if the graph has cycles) 123 * <li>Nodes being visited multiple times (if multiple paths exist from any start node to any 124 * node reachable from any start node) 125 * </ul> 126 * 127 * <p><b>Performance notes</b> 128 * 129 * <ul> 130 * <li>Traversals require <i>O(n)</i> time (where <i>n</i> is the number of nodes reachable from 131 * the start node). 132 * <li>While traversing, the traverser will use <i>O(H)</i> space (where <i>H</i> is the number 133 * of nodes that have been seen but not yet visited, that is, the "horizon"). 134 * </ul> 135 * 136 * <p><b>Examples</b> (all edges are directed facing downwards) 137 * 138 * <p>The graph below would be valid input with start nodes of {@code a, f, c}. However, if {@code 139 * b} were <i>also</i> a start node, then there would be multiple paths to reach {@code e} and 140 * {@code h}. 141 * 142 * <pre>{@code 143 * a b c 144 * / \ / \ | 145 * / \ / \ | 146 * d e f g 147 * | 148 * | 149 * h 150 * }</pre> 151 * 152 * <p>. 153 * 154 * <p>The graph below would be a valid input with start nodes of {@code a, f}. However, if {@code 155 * b} were a start node, there would be multiple paths to {@code f}. 156 * 157 * <pre>{@code 158 * a b 159 * / \ / \ 160 * / \ / \ 161 * c d e 162 * \ / 163 * \ / 164 * f 165 * }</pre> 166 * 167 * <p><b>Note on binary trees</b> 168 * 169 * <p>This method can be used to traverse over a binary tree. Given methods {@code 170 * leftChild(node)} and {@code rightChild(node)}, this method can be called as 171 * 172 * <pre>{@code 173 * Traverser.forTree(node -> ImmutableList.of(leftChild(node), rightChild(node))); 174 * }</pre> 175 * 176 * @param tree {@link SuccessorsFunction} representing a directed acyclic graph that has at most 177 * one path between any two nodes 178 */ 179 public static <N> Traverser<N> forTree(final SuccessorsFunction<N> tree) { 180 if (tree instanceof BaseGraph) { 181 checkArgument(((BaseGraph<?>) tree).isDirected(), "Undirected graphs can never be trees."); 182 } 183 if (tree instanceof Network) { 184 checkArgument(((Network<?, ?>) tree).isDirected(), "Undirected networks can never be trees."); 185 } 186 return new Traverser<N>(tree) { 187 @Override 188 Traversal<N> newTraversal() { 189 return Traversal.inTree(tree); 190 } 191 }; 192 } 193 194 /** 195 * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in 196 * the order of a breadth-first traversal. That is, all the nodes of depth 0 are returned, then 197 * depth 1, then 2, and so on. 198 * 199 * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in 200 * the order {@code abcdef} (assuming successors are returned in alphabetical order). 201 * 202 * <pre>{@code 203 * b ---- a ---- d 204 * | | 205 * | | 206 * e ---- c ---- f 207 * }</pre> 208 * 209 * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change 210 * while iteration is in progress. 211 * 212 * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will 213 * compute its next element on the fly. It is thus possible to limit the traversal to a certain 214 * number of nodes as follows: 215 * 216 * <pre>{@code 217 * Iterables.limit(Traverser.forGraph(graph).breadthFirst(node), maxNumberOfNodes); 218 * }</pre> 219 * 220 * <p>See <a href="https://en.wikipedia.org/wiki/Breadth-first_search">Wikipedia</a> for more 221 * info. 222 * 223 * @throws IllegalArgumentException if {@code startNode} is not an element of the graph 224 */ 225 public final Iterable<N> breadthFirst(N startNode) { 226 return breadthFirst(ImmutableSet.of(startNode)); 227 } 228 229 /** 230 * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code 231 * startNodes}, in the order of a breadth-first traversal. This is equivalent to a breadth-first 232 * traversal of a graph with an additional root node whose successors are the listed {@code 233 * startNodes}. 234 * 235 * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph 236 * @see #breadthFirst(Object) 237 * @since 24.1 238 */ 239 public final Iterable<N> breadthFirst(Iterable<? extends N> startNodes) { 240 final ImmutableSet<N> validated = validate(startNodes); 241 return new Iterable<N>() { 242 @Override 243 public Iterator<N> iterator() { 244 return newTraversal().breadthFirst(validated.iterator()); 245 } 246 }; 247 } 248 249 /** 250 * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in 251 * the order of a depth-first pre-order traversal. "Pre-order" implies that nodes appear in the 252 * {@code Iterable} in the order in which they are first visited. 253 * 254 * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in 255 * the order {@code abecfd} (assuming successors are returned in alphabetical order). 256 * 257 * <pre>{@code 258 * b ---- a ---- d 259 * | | 260 * | | 261 * e ---- c ---- f 262 * }</pre> 263 * 264 * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change 265 * while iteration is in progress. 266 * 267 * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will 268 * compute its next element on the fly. It is thus possible to limit the traversal to a certain 269 * number of nodes as follows: 270 * 271 * <pre>{@code 272 * Iterables.limit( 273 * Traverser.forGraph(graph).depthFirstPreOrder(node), maxNumberOfNodes); 274 * }</pre> 275 * 276 * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info. 277 * 278 * @throws IllegalArgumentException if {@code startNode} is not an element of the graph 279 */ 280 public final Iterable<N> depthFirstPreOrder(N startNode) { 281 return depthFirstPreOrder(ImmutableSet.of(startNode)); 282 } 283 284 /** 285 * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code 286 * startNodes}, in the order of a depth-first pre-order traversal. This is equivalent to a 287 * depth-first pre-order traversal of a graph with an additional root node whose successors are 288 * the listed {@code startNodes}. 289 * 290 * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph 291 * @see #depthFirstPreOrder(Object) 292 * @since 24.1 293 */ 294 public final Iterable<N> depthFirstPreOrder(Iterable<? extends N> startNodes) { 295 final ImmutableSet<N> validated = validate(startNodes); 296 return new Iterable<N>() { 297 @Override 298 public Iterator<N> iterator() { 299 return newTraversal().preOrder(validated.iterator()); 300 } 301 }; 302 } 303 304 /** 305 * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in 306 * the order of a depth-first post-order traversal. "Post-order" implies that nodes appear in the 307 * {@code Iterable} in the order in which they are visited for the last time. 308 * 309 * <p><b>Example:</b> The following graph with {@code startNode} {@code a} would return nodes in 310 * the order {@code fcebda} (assuming successors are returned in alphabetical order). 311 * 312 * <pre>{@code 313 * b ---- a ---- d 314 * | | 315 * | | 316 * e ---- c ---- f 317 * }</pre> 318 * 319 * <p>The behavior of this method is undefined if the nodes, or the topology of the graph, change 320 * while iteration is in progress. 321 * 322 * <p>The returned {@code Iterable} can be iterated over multiple times. Every iterator will 323 * compute its next element on the fly. It is thus possible to limit the traversal to a certain 324 * number of nodes as follows: 325 * 326 * <pre>{@code 327 * Iterables.limit( 328 * Traverser.forGraph(graph).depthFirstPostOrder(node), maxNumberOfNodes); 329 * }</pre> 330 * 331 * <p>See <a href="https://en.wikipedia.org/wiki/Depth-first_search">Wikipedia</a> for more info. 332 * 333 * @throws IllegalArgumentException if {@code startNode} is not an element of the graph 334 */ 335 public final Iterable<N> depthFirstPostOrder(N startNode) { 336 return depthFirstPostOrder(ImmutableSet.of(startNode)); 337 } 338 339 /** 340 * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code 341 * startNodes}, in the order of a depth-first post-order traversal. This is equivalent to a 342 * depth-first post-order traversal of a graph with an additional root node whose successors are 343 * the listed {@code startNodes}. 344 * 345 * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph 346 * @see #depthFirstPostOrder(Object) 347 * @since 24.1 348 */ 349 public final Iterable<N> depthFirstPostOrder(Iterable<? extends N> startNodes) { 350 final ImmutableSet<N> validated = validate(startNodes); 351 return new Iterable<N>() { 352 @Override 353 public Iterator<N> iterator() { 354 return newTraversal().postOrder(validated.iterator()); 355 } 356 }; 357 } 358 359 abstract Traversal<N> newTraversal(); 360 361 @SuppressWarnings("CheckReturnValue") 362 private ImmutableSet<N> validate(Iterable<? extends N> startNodes) { 363 ImmutableSet<N> copy = ImmutableSet.copyOf(startNodes); 364 for (N node : copy) { 365 successorFunction.successors(node); // Will throw if node doesn't exist 366 } 367 return copy; 368 } 369 370 /** 371 * Abstracts away the difference between traversing a graph vs. a tree. For a tree, we just take 372 * the next element from the next non-empty iterator; for graph, we need to loop through the next 373 * non-empty iterator to find first unvisited node. 374 */ 375 private abstract static class Traversal<N> { 376 final SuccessorsFunction<N> successorFunction; 377 378 Traversal(SuccessorsFunction<N> successorFunction) { 379 this.successorFunction = successorFunction; 380 } 381 382 static <N> Traversal<N> inGraph(SuccessorsFunction<N> graph) { 383 final Set<N> visited = new HashSet<>(); 384 return new Traversal<N>(graph) { 385 @Override 386 N visitNext(Deque<Iterator<? extends N>> horizon) { 387 Iterator<? extends N> top = horizon.getFirst(); 388 while (top.hasNext()) { 389 N element = checkNotNull(top.next()); 390 if (visited.add(element)) { 391 return element; 392 } 393 } 394 horizon.removeFirst(); 395 return null; 396 } 397 }; 398 } 399 400 static <N> Traversal<N> inTree(SuccessorsFunction<N> tree) { 401 return new Traversal<N>(tree) { 402 @Override 403 N visitNext(Deque<Iterator<? extends N>> horizon) { 404 Iterator<? extends N> top = horizon.getFirst(); 405 if (top.hasNext()) { 406 return checkNotNull(top.next()); 407 } 408 horizon.removeFirst(); 409 return null; 410 } 411 }; 412 } 413 414 final Iterator<N> breadthFirst(Iterator<? extends N> startNodes) { 415 return topDown(startNodes, InsertionOrder.BACK); 416 } 417 418 final Iterator<N> preOrder(Iterator<? extends N> startNodes) { 419 return topDown(startNodes, InsertionOrder.FRONT); 420 } 421 422 /** 423 * In top-down traversal, an ancestor node is always traversed before any of its descendant 424 * nodes. The traversal order among descendant nodes (particularly aunts and nieces) are 425 * determined by the {@code InsertionOrder} parameter: nieces are placed at the FRONT before 426 * aunts for pre-order; while in BFS they are placed at the BACK after aunts. 427 */ 428 private Iterator<N> topDown(Iterator<? extends N> startNodes, final InsertionOrder order) { 429 final Deque<Iterator<? extends N>> horizon = new ArrayDeque<>(); 430 horizon.add(startNodes); 431 return new AbstractIterator<N>() { 432 @Override 433 protected N computeNext() { 434 do { 435 N next = visitNext(horizon); 436 if (next != null) { 437 Iterator<? extends N> successors = successorFunction.successors(next).iterator(); 438 if (successors.hasNext()) { 439 // BFS: horizon.addLast(successors) 440 // Pre-order: horizon.addFirst(successors) 441 order.insertInto(horizon, successors); 442 } 443 return next; 444 } 445 } while (!horizon.isEmpty()); 446 return endOfData(); 447 } 448 }; 449 } 450 451 final Iterator<N> postOrder(Iterator<? extends N> startNodes) { 452 final Deque<N> ancestorStack = new ArrayDeque<>(); 453 final Deque<Iterator<? extends N>> horizon = new ArrayDeque<>(); 454 horizon.add(startNodes); 455 return new AbstractIterator<N>() { 456 @Override 457 protected N computeNext() { 458 for (N next = visitNext(horizon); next != null; next = visitNext(horizon)) { 459 Iterator<? extends N> successors = successorFunction.successors(next).iterator(); 460 if (!successors.hasNext()) { 461 return next; 462 } 463 horizon.addFirst(successors); 464 ancestorStack.push(next); 465 } 466 return ancestorStack.isEmpty() ? endOfData() : ancestorStack.pop(); 467 } 468 }; 469 } 470 471 /** 472 * Visits the next node from the top iterator of {@code horizon} and returns the visited node. 473 * Null is returned to indicate reaching the end of the top iterator. 474 * 475 * <p>For example, if horizon is {@code [[a, b], [c, d], [e]]}, {@code visitNext()} will return 476 * {@code [a, b, null, c, d, null, e, null]} sequentially, encoding the topological structure. 477 * (Note, however, that the callers of {@code visitNext()} often insert additional iterators 478 * into {@code horizon} between calls to {@code visitNext()}. This causes them to receive 479 * additional values interleaved with those shown above.) 480 */ 481 @Nullable 482 abstract N visitNext(Deque<Iterator<? extends N>> horizon); 483 } 484 485 /** Poor man's method reference for {@code Deque::addFirst} and {@code Deque::addLast}. */ 486 private enum InsertionOrder { 487 FRONT { 488 @Override 489 <T> void insertInto(Deque<T> deque, T value) { 490 deque.addFirst(value); 491 } 492 }, 493 BACK { 494 @Override 495 <T> void insertInto(Deque<T> deque, T value) { 496 deque.addLast(value); 497 } 498 }; 499 500 abstract <T> void insertInto(Deque<T> deque, T value); 501 } 502}