|
1 |
| -use crate::graph::{DirectedGraph, NumEdges, Successors}; |
| 1 | +use crate::graph::{DirectedGraph, NumEdges, Predecessors, Successors}; |
2 | 2 | use rustc_index::{Idx, IndexVec};
|
3 | 3 |
|
4 | 4 | #[cfg(test)]
|
5 | 5 | mod tests;
|
6 | 6 |
|
7 |
| -pub struct VecGraph<N: Idx> { |
8 |
| - /// Maps from a given node to an index where the set of successors |
9 |
| - /// for that node starts. The index indexes into the `edges` |
10 |
| - /// vector. To find the range for a given node, we look up the |
11 |
| - /// start for that node and then the start for the next node |
12 |
| - /// (i.e., with an index 1 higher) and get the range between the |
13 |
| - /// two. This vector always has an extra entry so that this works |
14 |
| - /// even for the max element. |
| 7 | +/// A directed graph, efficient for cases where node indices are pre-existing. |
| 8 | +/// |
| 9 | +/// If `BR` is true, the graph will store back-references, allowing you to get predecessors. |
| 10 | +pub struct VecGraph<N: Idx, const BR: bool = false> { |
| 11 | + // This is basically a `HashMap<N, (Vec<N>, If<BR, Vec<N>>)>` -- a map from a node index, to |
| 12 | + // a list of targets of outgoing edges and (if enabled) a list of sources of incoming edges. |
| 13 | + // |
| 14 | + // However, it is condensed into two arrays as an optimization. |
| 15 | + // |
| 16 | + // `node_starts[n]` is the start of the list of targets of outgoing edges for node `n`. |
| 17 | + // So you can get node's successors with `edge_targets[node_starts[n]..node_starts[n + 1]]`. |
| 18 | + // |
| 19 | + // If `BR` is true (back references are enabled), then `node_starts[n + edge_count]` is the |
| 20 | + // start of the list of *sources* of incoming edges. You can get predecessors of a node |
| 21 | + // similarly to its successors but offsetting by `edge_count`. `edge_count` is |
| 22 | + // `edge_targets.len()/2` (again, in case BR is true) because half of the vec is back refs. |
| 23 | + // |
| 24 | + // All of this might be confusing, so here is an example graph and its representation: |
| 25 | + // |
| 26 | + // n3 ----+ |
| 27 | + // ^ | (if BR = true) |
| 28 | + // | v outgoing edges incoming edges |
| 29 | + // n0 -> n1 -> n2 ______________ __________________ |
| 30 | + // / \ / \ |
| 31 | + // node indices[1]: n0, n1, n2, n3, n0, n1, n2, n3, n/a |
| 32 | + // vec indices: n0, n1, n2, n3, n4, n5, n6, n7, n8 |
| 33 | + // node_starts: [0, 1, 3, 4 4, 4, 5, 7, 8] |
| 34 | + // | | | | | | | | | |
| 35 | + // | | +---+ +---+ | +---+ | |
| 36 | + // | | | | | | | |
| 37 | + // v v v v v v v |
| 38 | + // edge_targets: [n1, n2, n3, n2 n0, n1, n3, n1] |
| 39 | + // / \____/ | | \____/ \ |
| 40 | + // n0->n1 / | | \ n3<-n1 |
| 41 | + // / n3->n2 [2] n1<-n0 [2] \ |
| 42 | + // n1->n2, n1->n3 n2<-n1, n2<-n3 |
| 43 | + // |
| 44 | + // The incoming edges are basically stored in the same way as outgoing edges, but offset and |
| 45 | + // the graph they store is the inverse of the original. Last index in the `node_starts` array |
| 46 | + // always points to one-past-the-end, so that we don't need to bound check `node_starts[n + 1]` |
| 47 | + // |
| 48 | + // [1]: "node indices" are the indices a user of `VecGraph` might use, |
| 49 | + // note that they are different from "vec indices", |
| 50 | + // which are the real indices you need to index `node_starts` |
| 51 | + // |
| 52 | + // [2]: Note that even though n2 also points to here, |
| 53 | + // the next index also points here, so n2 has no |
| 54 | + // successors (`edge_targets[3..3] = []`). |
| 55 | + // Similarly with n0 and incoming edges |
| 56 | + // |
| 57 | + // If this is still confusing... then sorry :( |
| 58 | + // |
| 59 | + /// Indices into `edge_targets` that signify a start of list of edges. |
15 | 60 | node_starts: IndexVec<N, usize>,
|
16 | 61 |
|
| 62 | + /// Targets (or sources for back refs) of edges |
17 | 63 | edge_targets: Vec<N>,
|
18 | 64 | }
|
19 | 65 |
|
20 |
| -impl<N: Idx + Ord> VecGraph<N> { |
| 66 | +impl<N: Idx + Ord, const BR: bool> VecGraph<N, BR> { |
21 | 67 | pub fn new(num_nodes: usize, mut edge_pairs: Vec<(N, N)>) -> Self {
|
| 68 | + let num_edges = edge_pairs.len(); |
| 69 | + |
| 70 | + let nodes_cap = match BR { |
| 71 | + // +1 for special entry at the end, pointing one past the end of `edge_targets` |
| 72 | + false => num_nodes + 1, |
| 73 | + // *2 for back references |
| 74 | + true => (num_nodes * 2) + 1, |
| 75 | + }; |
| 76 | + |
| 77 | + let edges_cap = match BR { |
| 78 | + false => num_edges, |
| 79 | + // *2 for back references |
| 80 | + true => num_edges * 2, |
| 81 | + }; |
| 82 | + |
| 83 | + let mut node_starts = IndexVec::with_capacity(nodes_cap); |
| 84 | + let mut edge_targets = Vec::with_capacity(edges_cap); |
| 85 | + |
22 | 86 | // Sort the edges by the source -- this is important.
|
23 | 87 | edge_pairs.sort();
|
24 | 88 |
|
25 |
| - let num_edges = edge_pairs.len(); |
| 89 | + // Fill forward references |
| 90 | + create_index( |
| 91 | + num_nodes, |
| 92 | + &mut edge_pairs.iter().map(|&(src, _)| src), |
| 93 | + &mut edge_pairs.iter().map(|&(_, tgt)| tgt), |
| 94 | + &mut edge_targets, |
| 95 | + &mut node_starts, |
| 96 | + ); |
26 | 97 |
|
27 |
| - // Store the *target* of each edge into `edge_targets`. |
28 |
| - let edge_targets: Vec<N> = edge_pairs.iter().map(|&(_, target)| target).collect(); |
29 |
| - |
30 |
| - // Create the *edge starts* array. We are iterating over the |
31 |
| - // (sorted) edge pairs. We maintain the invariant that the |
32 |
| - // length of the `node_starts` array is enough to store the |
33 |
| - // current source node -- so when we see that the source node |
34 |
| - // for an edge is greater than the current length, we grow the |
35 |
| - // edge-starts array by just enough. |
36 |
| - let mut node_starts = IndexVec::with_capacity(num_edges); |
37 |
| - for (index, &(source, _)) in edge_pairs.iter().enumerate() { |
38 |
| - // If we have a list like `[(0, x), (2, y)]`: |
39 |
| - // |
40 |
| - // - Start out with `node_starts` of `[]` |
41 |
| - // - Iterate to `(0, x)` at index 0: |
42 |
| - // - Push one entry because `node_starts.len()` (0) is <= the source (0) |
43 |
| - // - Leaving us with `node_starts` of `[0]` |
44 |
| - // - Iterate to `(2, y)` at index 1: |
45 |
| - // - Push one entry because `node_starts.len()` (1) is <= the source (2) |
46 |
| - // - Push one entry because `node_starts.len()` (2) is <= the source (2) |
47 |
| - // - Leaving us with `node_starts` of `[0, 1, 1]` |
48 |
| - // - Loop terminates |
49 |
| - while node_starts.len() <= source.index() { |
50 |
| - node_starts.push(index); |
51 |
| - } |
52 |
| - } |
| 98 | + // Fill back references |
| 99 | + if BR { |
| 100 | + // Pop the special "last" entry, it will be replaced by first back ref |
| 101 | + node_starts.pop(); |
53 | 102 |
|
54 |
| - // Pad out the `node_starts` array so that it has `num_nodes + |
55 |
| - // 1` entries. Continuing our example above, if `num_nodes` is |
56 |
| - // be `3`, we would push one more index: `[0, 1, 1, 2]`. |
57 |
| - // |
58 |
| - // Interpretation of that vector: |
59 |
| - // |
60 |
| - // [0, 1, 1, 2] |
61 |
| - // ---- range for N=2 |
62 |
| - // ---- range for N=1 |
63 |
| - // ---- range for N=0 |
64 |
| - while node_starts.len() <= num_nodes { |
65 |
| - node_starts.push(edge_targets.len()); |
66 |
| - } |
| 103 | + // Re-sort the edges so that they are sorted by target |
| 104 | + edge_pairs.sort_by_key(|&(src, tgt)| (tgt, src)); |
67 | 105 |
|
68 |
| - assert_eq!(node_starts.len(), num_nodes + 1); |
| 106 | + create_index( |
| 107 | + // Back essentially double the number of nodes |
| 108 | + num_nodes * 2, |
| 109 | + // NB: the source/target are switched here too |
| 110 | + // NB: we double the key index, so that we can later use *2 to get the back references |
| 111 | + &mut edge_pairs.iter().map(|&(_, tgt)| N::new(tgt.index() + num_nodes)), |
| 112 | + &mut edge_pairs.iter().map(|&(src, _)| src), |
| 113 | + &mut edge_targets, |
| 114 | + &mut node_starts, |
| 115 | + ); |
| 116 | + } |
69 | 117 |
|
70 | 118 | Self { node_starts, edge_targets }
|
71 | 119 | }
|
72 | 120 |
|
73 | 121 | /// Gets the successors for `source` as a slice.
|
74 | 122 | pub fn successors(&self, source: N) -> &[N] {
|
| 123 | + assert!(source.index() < self.num_nodes()); |
| 124 | + |
75 | 125 | let start_index = self.node_starts[source];
|
76 | 126 | let end_index = self.node_starts[source.plus(1)];
|
77 | 127 | &self.edge_targets[start_index..end_index]
|
78 | 128 | }
|
79 | 129 | }
|
80 | 130 |
|
81 |
| -impl<N: Idx> DirectedGraph for VecGraph<N> { |
| 131 | +impl<N: Idx + Ord> VecGraph<N, true> { |
| 132 | + /// Gets the predecessors for `target` as a slice. |
| 133 | + pub fn predecessors(&self, target: N) -> &[N] { |
| 134 | + assert!(target.index() < self.num_nodes()); |
| 135 | + |
| 136 | + let target = N::new(target.index() + self.num_nodes()); |
| 137 | + |
| 138 | + let start_index = self.node_starts[target]; |
| 139 | + let end_index = self.node_starts[target.plus(1)]; |
| 140 | + &self.edge_targets[start_index..end_index] |
| 141 | + } |
| 142 | +} |
| 143 | + |
| 144 | +/// Creates/initializes the index for the [`VecGraph`]. A helper for [`VecGraph::new`]. |
| 145 | +/// |
| 146 | +/// - `num_nodes` is the target number of nodes in the graph |
| 147 | +/// - `sorted_edge_sources` are the edge sources, sorted |
| 148 | +/// - `associated_edge_targets` are the edge *targets* in the same order as sources |
| 149 | +/// - `edge_targets` is the vec of targets to be extended |
| 150 | +/// - `node_starts` is the index to be filled |
| 151 | +fn create_index<N: Idx + Ord>( |
| 152 | + num_nodes: usize, |
| 153 | + sorted_edge_sources: &mut dyn Iterator<Item = N>, |
| 154 | + associated_edge_targets: &mut dyn Iterator<Item = N>, |
| 155 | + edge_targets: &mut Vec<N>, |
| 156 | + node_starts: &mut IndexVec<N, usize>, |
| 157 | +) { |
| 158 | + let offset = edge_targets.len(); |
| 159 | + |
| 160 | + // Store the *target* of each edge into `edge_targets`. |
| 161 | + edge_targets.extend(associated_edge_targets); |
| 162 | + |
| 163 | + // Create the *edge starts* array. We are iterating over the |
| 164 | + // (sorted) edge pairs. We maintain the invariant that the |
| 165 | + // length of the `node_starts` array is enough to store the |
| 166 | + // current source node -- so when we see that the source node |
| 167 | + // for an edge is greater than the current length, we grow the |
| 168 | + // edge-starts array by just enough. |
| 169 | + for (index, source) in sorted_edge_sources.enumerate() { |
| 170 | + // If we have a list like `[(0, x), (2, y)]`: |
| 171 | + // |
| 172 | + // - Start out with `node_starts` of `[]` |
| 173 | + // - Iterate to `(0, x)` at index 0: |
| 174 | + // - Push one entry because `node_starts.len()` (0) is <= the source (0) |
| 175 | + // - Leaving us with `node_starts` of `[0]` |
| 176 | + // - Iterate to `(2, y)` at index 1: |
| 177 | + // - Push one entry because `node_starts.len()` (1) is <= the source (2) |
| 178 | + // - Push one entry because `node_starts.len()` (2) is <= the source (2) |
| 179 | + // - Leaving us with `node_starts` of `[0, 1, 1]` |
| 180 | + // - Loop terminates |
| 181 | + while node_starts.len() <= source.index() { |
| 182 | + node_starts.push(index + offset); |
| 183 | + } |
| 184 | + } |
| 185 | + |
| 186 | + // Pad out the `node_starts` array so that it has `num_nodes + |
| 187 | + // 1` entries. Continuing our example above, if `num_nodes` is |
| 188 | + // be `3`, we would push one more index: `[0, 1, 1, 2]`. |
| 189 | + // |
| 190 | + // Interpretation of that vector: |
| 191 | + // |
| 192 | + // [0, 1, 1, 2] |
| 193 | + // ---- range for N=2 |
| 194 | + // ---- range for N=1 |
| 195 | + // ---- range for N=0 |
| 196 | + while node_starts.len() <= num_nodes { |
| 197 | + node_starts.push(edge_targets.len()); |
| 198 | + } |
| 199 | + |
| 200 | + assert_eq!(node_starts.len(), num_nodes + 1); |
| 201 | +} |
| 202 | + |
| 203 | +impl<N: Idx, const BR: bool> DirectedGraph for VecGraph<N, BR> { |
82 | 204 | type Node = N;
|
83 | 205 |
|
84 | 206 | fn num_nodes(&self) -> usize {
|
85 |
| - self.node_starts.len() - 1 |
| 207 | + match BR { |
| 208 | + false => self.node_starts.len() - 1, |
| 209 | + // If back refs are enabled, half of the array is said back refs |
| 210 | + true => (self.node_starts.len() - 1) / 2, |
| 211 | + } |
86 | 212 | }
|
87 | 213 | }
|
88 | 214 |
|
89 |
| -impl<N: Idx> NumEdges for VecGraph<N> { |
| 215 | +impl<N: Idx, const BR: bool> NumEdges for VecGraph<N, BR> { |
90 | 216 | fn num_edges(&self) -> usize {
|
91 |
| - self.edge_targets.len() |
| 217 | + match BR { |
| 218 | + false => self.edge_targets.len(), |
| 219 | + // If back refs are enabled, half of the array is reversed edges for them |
| 220 | + true => self.edge_targets.len() / 2, |
| 221 | + } |
92 | 222 | }
|
93 | 223 | }
|
94 | 224 |
|
95 |
| -impl<N: Idx + Ord> Successors for VecGraph<N> { |
| 225 | +impl<N: Idx + Ord, const BR: bool> Successors for VecGraph<N, BR> { |
96 | 226 | fn successors(&self, node: N) -> impl Iterator<Item = Self::Node> {
|
97 | 227 | self.successors(node).iter().cloned()
|
98 | 228 | }
|
99 | 229 | }
|
| 230 | + |
| 231 | +impl<N: Idx + Ord> Predecessors for VecGraph<N, true> { |
| 232 | + fn predecessors(&self, node: Self::Node) -> impl Iterator<Item = Self::Node> { |
| 233 | + self.predecessors(node).iter().cloned() |
| 234 | + } |
| 235 | +} |
0 commit comments