-
-
Notifications
You must be signed in to change notification settings - Fork 3.2k
Expand file tree
/
Copy pathgraph_utils.py
More file actions
161 lines (131 loc) · 4.87 KB
/
graph_utils.py
File metadata and controls
161 lines (131 loc) · 4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
"""Helpers for manipulations with graphs."""
from __future__ import annotations
from collections.abc import Iterator, Set as AbstractSet
from typing import TypeVar
T = TypeVar("T")
def strongly_connected_components(
vertices: AbstractSet[T], edges: dict[T, list[T]]
) -> Iterator[set[T]]:
"""Compute Strongly Connected Components of a directed graph.
Args:
vertices: the labels for the vertices
edges: for each vertex, gives the target vertices of its outgoing edges
Returns:
An iterator yielding strongly connected components, each
represented as a set of vertices. Each input vertex will occur
exactly once; vertices not part of a SCC are returned as
singleton sets.
From https://code.activestate.com/recipes/578507/.
"""
identified: set[T] = set()
stack: list[T] = []
index: dict[T, int] = {}
boundaries: list[int] = []
def dfs(v: T) -> Iterator[set[T]]:
index[v] = len(stack)
stack.append(v)
boundaries.append(index[v])
for w in edges[v]:
if w not in index:
yield from dfs(w)
elif w not in identified:
while index[w] < boundaries[-1]:
boundaries.pop()
if boundaries[-1] == index[v]:
boundaries.pop()
scc = set(stack[index[v] :])
del stack[index[v] :]
identified.update(scc)
yield scc
for v in vertices:
if v not in index:
yield from dfs(v)
def prepare_sccs(
sccs: list[set[T]], edges: dict[T, list[T]]
) -> dict[AbstractSet[T], set[AbstractSet[T]]]:
"""Use original edges to organize SCCs in a graph by dependencies between them."""
sccsmap = {}
for scc in sccs:
scc_frozen = frozenset(scc)
for v in scc:
sccsmap[v] = scc_frozen
data: dict[AbstractSet[T], set[AbstractSet[T]]] = {}
for scc in sccs:
deps: set[AbstractSet[T]] = set()
for v in scc:
deps.update(sccsmap[x] for x in edges[v])
data[frozenset(scc)] = deps
return data
class topsort(Iterator[set[T]]): # noqa: N801
"""Topological sort using Kahn's algorithm.
Uses in-degree counters and a reverse adjacency list, so the total work
is O(V + E).
Implemented as a class rather than a generator for better mypyc
compilation.
Args:
data: A map from vertices to all vertices that it has an edge
connecting it to. NOTE: dependency sets in this data
structure are modified in place to remove self-dependencies.
Orphans are handled internally and are not added to `data`.
Returns:
An iterator yielding sets of vertices that have an equivalent
ordering.
Example:
Suppose the input has the following structure:
{A: {B, C}, B: {D}, C: {D}}
The algorithm treats orphan dependencies as if normalized to:
{A: {B, C}, B: {D}, C: {D}, D: {}}
It will yield the following values:
{D}
{B, C}
{A}
"""
def __init__(self, data: dict[T, set[T]]) -> None:
# Single pass: remove self-deps, build reverse adjacency list,
# compute in-degree counts, detect orphans, and find initial ready set.
in_degree: dict[T, int] = {}
rev: dict[T, list[T]] = {}
ready: set[T] = set()
for item, deps in data.items():
deps.discard(item) # Ignore self dependencies.
deg = len(deps)
in_degree[item] = deg
if deg == 0:
ready.add(item)
if item not in rev:
rev[item] = []
for dep in deps:
if dep in rev:
rev[dep].append(item)
else:
rev[dep] = [item]
if dep not in data:
# Orphan: appears as dependency but has no entry in data.
in_degree[dep] = 0
ready.add(dep)
self.in_degree = in_degree
self.rev = rev
self.ready = ready
self.remaining = len(in_degree) - len(ready)
def __iter__(self) -> Iterator[set[T]]:
return self
def __next__(self) -> set[T]:
ready = self.ready
if not ready:
assert self.remaining == 0, (
f"A cyclic dependency exists amongst "
f"{[k for k, deg in self.in_degree.items() if deg > 0]!r}"
)
raise StopIteration
in_degree = self.in_degree
rev = self.rev
new_ready: set[T] = set()
for item in ready:
for dependent in rev[item]:
new_deg = in_degree[dependent] - 1
in_degree[dependent] = new_deg
if new_deg == 0:
new_ready.add(dependent)
self.remaining -= len(new_ready)
self.ready = new_ready
return ready