Codestin Search App

598 lines (516 loc) · 24 KB
"""Type inference constraint solving"""
from __future__ import annotations
from collections import defaultdict
from collections.abc import Iterable, Sequence
from typing import TypeAlias as _TypeAlias
from mypy.constraints import SUBTYPE_OF, SUPERTYPE_OF, Constraint, infer_constraints, neg_op
from mypy.expandtype import expand_type
from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort
from mypy.join import join_type_list
from mypy.meet import meet_type_list, meet_types
from mypy.subtypes import is_subtype
from mypy.typeops import get_all_type_vars
from mypy.types import (
    AnyType,
    Instance,
    NoneType,
    ParamSpecType,
    ProperType,
    TupleType,
    TypeOfAny,
    TypeVarId,
    TypeVarLikeType,
    TypeVarTupleType,
    TypeVarType,
    UninhabitedType,
    UnionType,
    UnpackType,
    get_proper_type,
from mypy.typestate import type_state
Bounds: _TypeAlias = "dict[TypeVarId, set[Type]]"
Graph: _TypeAlias = "set[tuple[TypeVarId, TypeVarId]]"
Solutions: _TypeAlias = "dict[TypeVarId, Type | None]"
def solve_constraints(
    original_vars: Sequence[TypeVarLikeType],
    constraints: list[Constraint],
    strict: bool = True,
    allow_polymorphic: bool = False,
    skip_unsatisfied: bool = False,
) -> tuple[list[Type | None], list[TypeVarLikeType]]:
    """Solve type constraints.
    Return the best type(s) for type variables; each type can be None if the value of
    the variable could not be solved.
    If a variable has no constraints, if strict=True then arbitrarily
    pick UninhabitedType as the value of the type variable. If strict=False, pick AnyType.
    If allow_polymorphic=True, then use the full algorithm that can potentially return
    free type variables in solutions (these require special care when applying). Otherwise,
    use a simplified algorithm that just solves each type variable individually if possible.
    The skip_unsatisfied flag matches the same one in applytype.apply_generic_arguments().
    vars = [tv.id for tv in original_vars]
    if not vars:
        return [], []
    originals = {tv.id: tv for tv in original_vars}
    extra_vars: list[TypeVarId] = []
    # Get additional type variables from generic actuals.
    for c in constraints:
        extra_vars.extend([v.id for v in c.extra_tvars if v.id not in vars + extra_vars])
        originals.update({v.id: v for v in c.extra_tvars if v.id not in originals})
    if allow_polymorphic:
        # Constraints inferred from unions require special handling in polymorphic inference.
        constraints = skip_reverse_union_constraints(constraints)
    # Collect a list of constraints for each type variable.
    cmap: dict[TypeVarId, list[Constraint]] = {tv: [] for tv in vars + extra_vars}
    for con in constraints:
        if con.type_var in vars + extra_vars:
            cmap[con.type_var].append(con)
    if allow_polymorphic:
        if constraints:
            solutions, free_vars = solve_with_dependent(
                vars + extra_vars, constraints, vars, originals
        else:
            solutions = {}
            free_vars = []
        solutions = {}
        free_vars = []
        for tv, cs in cmap.items():
            if not cs:
                continue
            lowers = [c.target for c in cs if c.op == SUPERTYPE_OF]
            uppers = [c.target for c in cs if c.op == SUBTYPE_OF]
            solution = solve_one(lowers, uppers)
            # Do not leak type variables in non-polymorphic solutions.
            if solution is None or not get_vars(
                solution, [tv for tv in extra_vars if tv not in vars]
                solutions[tv] = solution
    res: list[Type | None] = []
    for v in vars:
        if v in solutions:
            res.append(solutions[v])
        else:
            # No constraints for type variable -- 'UninhabitedType' is the most specific type.
            candidate: Type
            if strict:
                candidate = UninhabitedType()
                candidate.ambiguous = True
            else:
                candidate = AnyType(TypeOfAny.special_form)
            res.append(candidate)
    if not free_vars and not skip_unsatisfied:
        # Most of the validation for solutions is done in applytype.py, but here we can
        # quickly test solutions w.r.t. to upper bounds, and use the latter (if possible),
        # if solutions are actually not valid (due to poor inference context).
        res = pre_validate_solutions(res, original_vars, constraints)
    return res, free_vars
def solve_with_dependent(
    vars: list[TypeVarId],
    constraints: list[Constraint],
    original_vars: list[TypeVarId],
    originals: dict[TypeVarId, TypeVarLikeType],
) -> tuple[Solutions, list[TypeVarLikeType]]:
    """Solve set of constraints that may depend on each other, like T <: List[S].
    The whole algorithm consists of five steps:
      * Propagate via linear constraints and use secondary constraints to get transitive closure
      * Find dependencies between type variables, group them in SCCs, and sort topologically
      * Check that all SCC are intrinsically linear, we can't solve (express) T <: List[T]
      * Variables in leaf SCCs that don't have constant bounds are free (choose one per SCC)
      * Solve constraints iteratively starting from leaves, updating bounds after each step.
    graph, lowers, uppers = transitive_closure(vars, constraints)
    dmap = compute_dependencies(vars, graph, lowers, uppers)
    sccs = list(strongly_connected_components(set(vars), dmap))
    if not all(check_linear(scc, lowers, uppers) for scc in sccs):
        return {}, []
    raw_batches = list(topsort(prepare_sccs(sccs, dmap)))
    free_vars = []
    free_solutions = {}
    for scc in raw_batches[0]:
        # If there are no bounds on this SCC, then the only meaningful solution we can
        # express, is that each variable is equal to a new free variable. For example,
        # if we have T <: S, S <: U, we deduce: T = S = U = <free>.
        if all(not lowers[tv] and not uppers[tv] for tv in scc):
            best_free = choose_free([originals[tv] for tv in scc], original_vars)
            if best_free:
                # TODO: failing to choose may cause leaking type variables,
                # we need to fail gracefully instead.
                free_vars.append(best_free.id)
                free_solutions[best_free.id] = best_free
    # Update lowers/uppers with free vars, so these can now be used
    # as valid solutions.
    for l, u in graph:
        if l in free_vars:
            lowers[u].add(free_solutions[l])
        if u in free_vars:
            uppers[l].add(free_solutions[u])
    # Flatten the SCCs that are independent, we can solve them together,
    # since we don't need to update any targets in between.
    batches = []
    for batch in raw_batches:
        next_bc = []
        for scc in batch:
            next_bc.extend(list(scc))
        batches.append(next_bc)
    solutions: dict[TypeVarId, Type | None] = {}
    for flat_batch in batches:
        res = solve_iteratively(flat_batch, graph, lowers, uppers)
        solutions.update(res)
    return solutions, [free_solutions[tv] for tv in free_vars]
def solve_iteratively(
    batch: list[TypeVarId], graph: Graph, lowers: Bounds, uppers: Bounds
) -> Solutions:
    """Solve transitive closure sequentially, updating upper/lower bounds after each step.
    Transitive closure is represented as a linear graph plus lower/upper bounds for each
    type variable, see transitive_closure() docstring for details.
    We solve for type variables that appear in `batch`. If a bound is not constant (i.e. it
    looks like T :> F[S, ...]), we substitute solutions found so far in the target F[S, ...]
    after solving the batch.
    Importantly, after solving each variable in a batch, we move it from linear graph to
    upper/lower bounds, this way we can guarantee consistency of solutions (see comment below
    for an example when this is important).
    solutions = {}
    s_batch = set(batch)
    while s_batch:
        for tv in sorted(s_batch, key=lambda x: x.raw_id):
            if lowers[tv] or uppers[tv]:
                solvable_tv = tv
                break
        else:
            break
        # Solve each solvable type variable separately.
        s_batch.remove(solvable_tv)
        result = solve_one(lowers[solvable_tv], uppers[solvable_tv])
        solutions[solvable_tv] = result
        if result is None:
            # TODO: support backtracking lower/upper bound choices and order within SCCs.
            # (will require switching this function from iterative to recursive).
            continue
        # Update the (transitive) bounds from graph if there is a solution.
        # This is needed to guarantee solutions will never contradict the initial
        # constraints. For example, consider {T <: S, T <: A, S :> B} with A :> B.
        # If we would not update the uppers/lowers from graph, we would infer T = A, S = B
        # which is not correct.
        for l, u in graph.copy():
            if l == u:
                continue
            if l == solvable_tv:
                lowers[u].add(result)
                graph.remove((l, u))
            if u == solvable_tv:
                uppers[l].add(result)
                graph.remove((l, u))
    # We can update uppers/lowers only once after solving the whole SCC,
    # since uppers/lowers can't depend on type variables in the SCC
    # (and we would reject such SCC as non-linear and therefore not solvable).
    subs = {tv: s for (tv, s) in solutions.items() if s is not None}
    for tv in lowers:
        lowers[tv] = {expand_type(lt, subs) for lt in lowers[tv]}
    for tv in uppers:
        uppers[tv] = {expand_type(ut, subs) for ut in uppers[tv]}
    return solutions
def _join_sorted_key(t: Type) -> int:
    t = get_proper_type(t)
    if isinstance(t, UnionType):
        return -2
    if isinstance(t, NoneType):
        return -1
    return 0
def solve_one(lowers: Iterable[Type], uppers: Iterable[Type]) -> Type | None:
    """Solve constraints by finding by using meets of upper bounds, and joins of lower bounds."""
    candidate: Type | None = None
    # Filter out previous results of failed inference, they will only spoil the current pass...
    new_uppers = []
    for u in uppers:
        pu = get_proper_type(u)
        if not isinstance(pu, UninhabitedType) or not pu.ambiguous:
            new_uppers.append(u)
    uppers = new_uppers
    # ...unless this is the only information we have, then we just pass it on.
    lowers = list(lowers)
    if not uppers and not lowers:
        candidate = UninhabitedType()
        candidate.ambiguous = True
        return candidate
    bottom: Type | None = None
    top: Type | None = None
    # Process each bound separately, and calculate the lower and upper
    # bounds based on constraints. Note that we assume that the constraint
    # targets do not have constraint references.
    if type_state.infer_unions and lowers:
        # This deviates from the general mypy semantics because
        # recursive types are union-heavy in 95% of cases.
        # Retain `None` when no bottoms were provided to avoid bogus `Never` inference.
        bottom = UnionType.make_union(lowers)
        # The order of lowers is non-deterministic.
        # We attempt to sort lowers because joins are non-associative. For instance:
        # join(join(int, str), int | str) == join(object, int | str) == object
        # join(int, join(str, int | str)) == join(int, int | str)    == int | str
        # Note that joins in theory should be commutative, but in practice some bugs mean this is
        # also a source of non-deterministic type checking results.
        sorted_lowers = sorted(lowers, key=_join_sorted_key)
        if sorted_lowers:
            bottom = join_type_list(sorted_lowers)
    for target in uppers:
        if top is None:
            top = target
        else:
            top = meet_types(top, target)
    p_top = get_proper_type(top)
    p_bottom = get_proper_type(bottom)
    if isinstance(p_top, AnyType) or isinstance(p_bottom, AnyType):
        source_any = top if isinstance(p_top, AnyType) else bottom
        assert isinstance(source_any, ProperType) and isinstance(source_any, AnyType)
        return AnyType(TypeOfAny.from_another_any, source_any=source_any)
    elif bottom is None:
        if top:
            candidate = top
        else:
            # No constraints for type variable
            return None
    elif top is None:
        candidate = bottom
    elif is_subtype(bottom, top):
        candidate = bottom
        candidate = None
    return candidate
def choose_free(
    scc: list[TypeVarLikeType], original_vars: list[TypeVarId]
) -> TypeVarLikeType | None:
    """Choose the best solution for an SCC containing only type variables.
    This is needed to preserve e.g. the upper bound in a situation like this:
        def dec(f: Callable[[T], S]) -> Callable[[T], S]: ...
        @dec
        def test(x: U) -> U: ...
    where U <: A.
    if len(scc) == 1:
        # Fast path, choice is trivial.
        return scc[0]
    common_upper_bound = meet_type_list([t.upper_bound for t in scc])
    common_upper_bound_p = get_proper_type(common_upper_bound)
    # We include None for when strict-optional is disabled.
    if isinstance(common_upper_bound_p, (UninhabitedType, NoneType)):
        # This will cause to infer Never, which is better than a free TypeVar
        # that has an upper bound Never.
        return None
    values: list[Type] = []
    for tv in scc:
        if isinstance(tv, TypeVarType) and tv.values:
            if values:
                # It is too tricky to support multiple TypeVars with values
                # within the same SCC.
                return None
            values = tv.values.copy()
    if values and not is_trivial_bound(common_upper_bound_p):
        # If there are both values and upper bound present, we give up,
        # since type variables having both are not supported.
        return None
    # For convenience with current type application machinery, we use a stable
    # choice that prefers the original type variables (not polymorphic ones) in SCC.
    best = min(scc, key=lambda x: (x.id not in original_vars, x.id.raw_id))
    if isinstance(best, TypeVarType):
        return best.copy_modified(values=values, upper_bound=common_upper_bound)
    if is_trivial_bound(common_upper_bound_p, allow_tuple=True):
        # TODO: support more cases for ParamSpecs/TypeVarTuples
        return best
    return None
def is_trivial_bound(tp: ProperType, allow_tuple: bool = False) -> bool:
    if isinstance(tp, Instance) and tp.type.fullname == "builtins.tuple":
        return allow_tuple and is_trivial_bound(get_proper_type(tp.args[0]))
    return isinstance(tp, Instance) and tp.type.fullname == "builtins.object"
def find_linear(c: Constraint) -> tuple[bool, TypeVarId | None]:
    """Find out if this constraint represent a linear relationship, return target id if yes."""
    if isinstance(c.origin_type_var, TypeVarType):
        if isinstance(c.target, TypeVarType):
            return True, c.target.id
    if isinstance(c.origin_type_var, ParamSpecType):
        if isinstance(c.target, ParamSpecType) and not c.target.prefix.arg_types:
            return True, c.target.id
    if isinstance(c.origin_type_var, TypeVarTupleType):
        target = get_proper_type(c.target)
        if isinstance(target, TupleType) and len(target.items) == 1:
            item = target.items[0]
            if isinstance(item, UnpackType) and isinstance(item.type, TypeVarTupleType):
                return True, item.type.id
    return False, None
def transitive_closure(
    tvars: list[TypeVarId], constraints: list[Constraint]
) -> tuple[Graph, Bounds, Bounds]:
    """Find transitive closure for given constraints on type variables.
    Transitive closure gives maximal set of lower/upper bounds for each type variable,
    such that we cannot deduce any further bounds by chaining other existing bounds.
    The transitive closure is represented by:
      * A set of lower and upper bounds for each type variable, where only constant and
        non-linear terms are included in the bounds.
      * A graph of linear constraints between type variables (represented as a set of pairs)
    Such separation simplifies reasoning, and allows an efficient and simple incremental
    transitive closure algorithm that we use here.
    For example if we have initial constraints [T <: S, S <: U, U <: int], the transitive
    closure is given by:
      * {} <: T <: {int}
      * {} <: S <: {int}
      * {} <: U <: {int}
      * {T <: S, S <: U, T <: U}
    uppers: Bounds = defaultdict(set)
    lowers: Bounds = defaultdict(set)
    graph: Graph = {(tv, tv) for tv in tvars}
    remaining = set(constraints)
    while remaining:
        c = remaining.pop()
        # Note that ParamSpec constraint P <: Q may be considered linear only if Q has no prefix,
        # for cases like P <: Concatenate[T, Q] we should consider this non-linear and put {P} and
        # {T, Q} into separate SCCs. Similarly, Ts <: Tuple[*Us] considered linear, while
        # Ts <: Tuple[*Us, U] is non-linear.
        is_linear, target_id = find_linear(c)
        if is_linear and target_id in tvars:
            assert target_id is not None
            if c.op == SUBTYPE_OF:
                lower, upper = c.type_var, target_id
            else:
                lower, upper = target_id, c.type_var
            if (lower, upper) in graph:
                continue
            graph |= {
                (l, u) for l in tvars for u in tvars if (l, lower) in graph and (upper, u) in graph
            for u in tvars:
                if (upper, u) in graph:
                    lowers[u] |= lowers[lower]
            for l in tvars:
                if (l, lower) in graph:
                    uppers[l] |= uppers[upper]
            for lt in lowers[lower]:
                for ut in uppers[upper]:
                    add_secondary_constraints(remaining, lt, ut)
        elif c.op == SUBTYPE_OF:
            if c.target in uppers[c.type_var]:
                continue
            for l in tvars:
                if (l, c.type_var) in graph:
                    uppers[l].add(c.target)
            for lt in lowers[c.type_var]:
                add_secondary_constraints(remaining, lt, c.target)
        else:
            assert c.op == SUPERTYPE_OF
            if c.target in lowers[c.type_var]:
                continue
            for u in tvars:
                if (c.type_var, u) in graph:
                    lowers[u].add(c.target)
            for ut in uppers[c.type_var]:
                add_secondary_constraints(remaining, c.target, ut)
    return graph, lowers, uppers
def add_secondary_constraints(cs: set[Constraint], lower: Type, upper: Type) -> None:
    """Add secondary constraints inferred between lower and upper (in place)."""
    if isinstance(get_proper_type(upper), UnionType) and isinstance(
        get_proper_type(lower), UnionType
        # When both types are unions, this can lead to inferring spurious constraints,
        # for example Union[T, int] <: S <: Union[T, int] may infer T <: int.
        # To avoid this, just skip them for now.
        return
    # TODO: what if secondary constraints result in inference against polymorphic actual?
    cs.update(set(infer_constraints(lower, upper, SUBTYPE_OF)))
    cs.update(set(infer_constraints(upper, lower, SUPERTYPE_OF)))
def compute_dependencies(
    tvars: list[TypeVarId], graph: Graph, lowers: Bounds, uppers: Bounds
) -> dict[TypeVarId, list[TypeVarId]]:
    """Compute dependencies between type variables induced by constraints.
    If we have a constraint like T <: List[S], we say that T depends on S, since
    we will need to solve for S first before we can solve for T.
    res = {}
    for tv in tvars:
        deps = set()
        for lt in lowers[tv]:
            deps |= get_vars(lt, tvars)
        for ut in uppers[tv]:
            deps |= get_vars(ut, tvars)
        for other in tvars:
            if other == tv:
                continue
            if (tv, other) in graph or (other, tv) in graph:
                deps.add(other)
        res[tv] = list(deps)
    return res
def check_linear(scc: set[TypeVarId], lowers: Bounds, uppers: Bounds) -> bool:
    """Check there are only linear constraints between type variables in SCC.
    Linear are constraints like T <: S (while T <: F[S] are non-linear).
    for tv in scc:
        if any(get_vars(lt, list(scc)) for lt in lowers[tv]):
            return False
        if any(get_vars(ut, list(scc)) for ut in uppers[tv]):
            return False
    return True
def skip_reverse_union_constraints(cs: list[Constraint]) -> list[Constraint]:
    """Avoid ambiguities for constraints inferred from unions during polymorphic inference.
    Polymorphic inference implicitly relies on assumption that a reverse of a linear constraint
    is a linear constraint. This is however not true in presence of union types, for example
    T :> Union[S, int] vs S <: T. Trying to solve such constraints would be detected ambiguous
    as (T, S) form a non-linear SCC. However, simply removing the linear part results in a valid
    solution T = Union[S, int], S = <free>. A similar scenario is when we get T <: Union[T, int],
    such constraints carry no information, and will equally confuse linearity check.
    TODO: a cleaner solution may be to avoid inferring such constraints in first place, but
    this would require passing around a flag through all infer_constraints() calls.
    reverse_union_cs = set()
    for c in cs:
        p_target = get_proper_type(c.target)
        if isinstance(p_target, UnionType):
            for item in p_target.items:
                if isinstance(item, TypeVarType):
                    if item == c.origin_type_var and c.op == SUBTYPE_OF:
                        reverse_union_cs.add(c)
                        continue
                    # These two forms are semantically identical, but are different from
                    # the point of view of Constraint.__eq__().
                    reverse_union_cs.add(Constraint(item, neg_op(c.op), c.origin_type_var))
                    reverse_union_cs.add(Constraint(c.origin_type_var, c.op, item))
    return [c for c in cs if c not in reverse_union_cs]
def get_vars(target: Type, vars: list[TypeVarId]) -> set[TypeVarId]:
    """Find type variables for which we are solving in a target type."""
    return {tv.id for tv in get_all_type_vars(target)} & set(vars)
def pre_validate_solutions(
    solutions: list[Type | None],
    original_vars: Sequence[TypeVarLikeType],
    constraints: list[Constraint],
) -> list[Type | None]:
    """Check is each solution satisfies the upper bound of the corresponding type variable.
    If it doesn't satisfy the bound, check if bound itself satisfies all constraints, and
    if yes, use it instead as a fallback solution.
    new_solutions: list[Type | None] = []
    for t, s in zip(original_vars, solutions):
        if is_callable_protocol(t.upper_bound):
            # This is really ad-hoc, but a proper fix would be much more complex,
            # and otherwise this may cause crash in a relatively common scenario.
            new_solutions.append(s)
            continue
        if s is not None and not is_subtype(s, t.upper_bound):
            bound_satisfies_all = True
            for c in constraints:
                if c.op == SUBTYPE_OF and not is_subtype(t.upper_bound, c.target):
                    bound_satisfies_all = False
                if c.op == SUPERTYPE_OF and not is_subtype(c.target, t.upper_bound):
                    bound_satisfies_all = False
            if bound_satisfies_all:
                new_solutions.append(t.upper_bound)
                continue
        new_solutions.append(s)
    return new_solutions
def is_callable_protocol(t: Type) -> bool:
    proper_t = get_proper_type(t)
    if isinstance(proper_t, Instance) and proper_t.type.is_protocol:
        return "__call__" in proper_t.type.protocol_members
    return False
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

FilesExpand file tree

solve.py

Latest commit

History

solve.py

File metadata and controls