gtn/.venv/Lib/site-packages/mypyc/analysis/attrdefined.py

437 lines
15 KiB
Python
Raw Normal View History

"""Always defined attribute analysis.
An always defined attribute has some statements in __init__ or the
class body that cause the attribute to be always initialized when an
instance is constructed. It must also not be possible to read the
attribute before initialization, and it can't be deletable.
We can assume that the value is always defined when reading an always
defined attribute. Otherwise we'll need to raise AttributeError if the
value is undefined (i.e. has the error value).
We use data flow analysis to figure out attributes that are always
defined. Example:
class C:
def __init__(self) -> None:
self.x = 0
if func():
self.y = 1
else:
self.y = 2
self.z = 3
In this example, the attributes 'x' and 'y' are always defined, but 'z'
is not. The analysis assumes that we know that there won't be any subclasses.
The analysis also works if there is a known, closed set of subclasses.
An attribute defined in a base class can only be always defined if it's
also always defined in all subclasses.
As soon as __init__ contains an op that can 'leak' self to another
function, we will stop inferring always defined attributes, since the
analysis is mostly intra-procedural and only looks at __init__ methods.
The called code could read an uninitialized attribute. Example:
class C:
def __init__(self) -> None:
self.x = self.foo()
def foo(self) -> int:
...
Now we won't infer 'x' as always defined, since 'foo' might read 'x'
before initialization.
As an exception to the above limitation, we perform inter-procedural
analysis of super().__init__ calls, since these are very common.
Our analysis is somewhat optimistic. We assume that nobody calls a
method of a partially uninitialized object through gc.get_objects(), in
particular. Code like this could potentially cause a segfault with a null
pointer dereference. This seems very unlikely to be an issue in practice,
however.
Accessing an attribute via getattr always checks for undefined attributes
and thus works if the object is partially uninitialized. This can be used
as a workaround if somebody ever needs to inspect partially uninitialized
objects via gc.get_objects().
The analysis runs after IR building as a separate pass. Since we only
run this on __init__ methods, this analysis pass will be fairly quick.
"""
from __future__ import annotations
from typing import Final, Set, Tuple
from mypyc.analysis.dataflow import (
CFG,
MAYBE_ANALYSIS,
AnalysisResult,
BaseAnalysisVisitor,
get_cfg,
run_analysis,
)
from mypyc.analysis.selfleaks import analyze_self_leaks
from mypyc.ir.class_ir import ClassIR
from mypyc.ir.ops import (
Assign,
AssignMulti,
BasicBlock,
Branch,
Call,
ControlOp,
GetAttr,
Register,
RegisterOp,
Return,
SetAttr,
SetMem,
Unreachable,
)
from mypyc.ir.rtypes import RInstance
# If True, print out all always-defined attributes of native classes (to aid
# debugging and testing)
dump_always_defined: Final = False
def analyze_always_defined_attrs(class_irs: list[ClassIR]) -> None:
"""Find always defined attributes all classes of a compilation unit.
Also tag attribute initialization ops to not decref the previous
value (as this would read a NULL pointer and segfault).
Update the _always_initialized_attrs, _sometimes_initialized_attrs
and init_self_leak attributes in ClassIR instances.
This is the main entry point.
"""
seen: set[ClassIR] = set()
# First pass: only look at target class and classes in MRO
for cl in class_irs:
analyze_always_defined_attrs_in_class(cl, seen)
# Second pass: look at all derived class
seen = set()
for cl in class_irs:
update_always_defined_attrs_using_subclasses(cl, seen)
# Final pass: detect attributes that need to use a bitmap to track definedness
seen = set()
for cl in class_irs:
detect_undefined_bitmap(cl, seen)
def analyze_always_defined_attrs_in_class(cl: ClassIR, seen: set[ClassIR]) -> None:
if cl in seen:
return
seen.add(cl)
if (
cl.is_trait
or cl.inherits_python
or cl.allow_interpreted_subclasses
or cl.builtin_base is not None
or cl.children is None
or cl.is_serializable()
):
# Give up -- we can't enforce that attributes are always defined.
return
# First analyze all base classes. Track seen classes to avoid duplicate work.
for base in cl.mro[1:]:
analyze_always_defined_attrs_in_class(base, seen)
m = cl.get_method("__init__")
if m is None:
cl._always_initialized_attrs = cl.attrs_with_defaults.copy()
cl._sometimes_initialized_attrs = cl.attrs_with_defaults.copy()
return
self_reg = m.arg_regs[0]
cfg = get_cfg(m.blocks)
dirty = analyze_self_leaks(m.blocks, self_reg, cfg)
maybe_defined = analyze_maybe_defined_attrs_in_init(
m.blocks, self_reg, cl.attrs_with_defaults, cfg
)
all_attrs: set[str] = set()
for base in cl.mro:
all_attrs.update(base.attributes)
maybe_undefined = analyze_maybe_undefined_attrs_in_init(
m.blocks, self_reg, initial_undefined=all_attrs - cl.attrs_with_defaults, cfg=cfg
)
always_defined = find_always_defined_attributes(
m.blocks, self_reg, all_attrs, maybe_defined, maybe_undefined, dirty
)
always_defined = {a for a in always_defined if not cl.is_deletable(a)}
cl._always_initialized_attrs = always_defined
if dump_always_defined:
print(cl.name, sorted(always_defined))
cl._sometimes_initialized_attrs = find_sometimes_defined_attributes(
m.blocks, self_reg, maybe_defined, dirty
)
mark_attr_initialiation_ops(m.blocks, self_reg, maybe_defined, dirty)
# Check if __init__ can run unpredictable code (leak 'self').
any_dirty = False
for b in m.blocks:
for i, op in enumerate(b.ops):
if dirty.after[b, i] and not isinstance(op, Return):
any_dirty = True
break
cl.init_self_leak = any_dirty
def find_always_defined_attributes(
blocks: list[BasicBlock],
self_reg: Register,
all_attrs: set[str],
maybe_defined: AnalysisResult[str],
maybe_undefined: AnalysisResult[str],
dirty: AnalysisResult[None],
) -> set[str]:
"""Find attributes that are always initialized in some basic blocks.
The analysis results are expected to be up-to-date for the blocks.
Return a set of always defined attributes.
"""
attrs = all_attrs.copy()
for block in blocks:
for i, op in enumerate(block.ops):
# If an attribute we *read* may be undefined, it isn't always defined.
if isinstance(op, GetAttr) and op.obj is self_reg:
if op.attr in maybe_undefined.before[block, i]:
attrs.discard(op.attr)
# If an attribute we *set* may be sometimes undefined and
# sometimes defined, don't consider it always defined. Unlike
# the get case, it's fine for the attribute to be undefined.
# The set operation will then be treated as initialization.
if isinstance(op, SetAttr) and op.obj is self_reg:
if (
op.attr in maybe_undefined.before[block, i]
and op.attr in maybe_defined.before[block, i]
):
attrs.discard(op.attr)
# Treat an op that might run arbitrary code as an "exit"
# in terms of the analysis -- we can't do any inference
# afterwards reliably.
if dirty.after[block, i]:
if not dirty.before[block, i]:
attrs = attrs & (
maybe_defined.after[block, i] - maybe_undefined.after[block, i]
)
break
if isinstance(op, ControlOp):
for target in op.targets():
# Gotos/branches can also be "exits".
if not dirty.after[block, i] and dirty.before[target, 0]:
attrs = attrs & (
maybe_defined.after[target, 0] - maybe_undefined.after[target, 0]
)
return attrs
def find_sometimes_defined_attributes(
blocks: list[BasicBlock],
self_reg: Register,
maybe_defined: AnalysisResult[str],
dirty: AnalysisResult[None],
) -> set[str]:
"""Find attributes that are sometimes initialized in some basic blocks."""
attrs: set[str] = set()
for block in blocks:
for i, op in enumerate(block.ops):
# Only look at possibly defined attributes at exits.
if dirty.after[block, i]:
if not dirty.before[block, i]:
attrs = attrs | maybe_defined.after[block, i]
break
if isinstance(op, ControlOp):
for target in op.targets():
if not dirty.after[block, i] and dirty.before[target, 0]:
attrs = attrs | maybe_defined.after[target, 0]
return attrs
def mark_attr_initialiation_ops(
blocks: list[BasicBlock],
self_reg: Register,
maybe_defined: AnalysisResult[str],
dirty: AnalysisResult[None],
) -> None:
"""Tag all SetAttr ops in the basic blocks that initialize attributes.
Initialization ops assume that the previous attribute value is the error value,
so there's no need to decref or check for definedness.
"""
for block in blocks:
for i, op in enumerate(block.ops):
if isinstance(op, SetAttr) and op.obj is self_reg:
attr = op.attr
if attr not in maybe_defined.before[block, i] and not dirty.after[block, i]:
op.mark_as_initializer()
GenAndKill = Tuple[Set[str], Set[str]]
def attributes_initialized_by_init_call(op: Call) -> set[str]:
"""Calculate attributes that are always initialized by a super().__init__ call."""
self_type = op.fn.sig.args[0].type
assert isinstance(self_type, RInstance)
cl = self_type.class_ir
return {a for base in cl.mro for a in base.attributes if base.is_always_defined(a)}
def attributes_maybe_initialized_by_init_call(op: Call) -> set[str]:
"""Calculate attributes that may be initialized by a super().__init__ call."""
self_type = op.fn.sig.args[0].type
assert isinstance(self_type, RInstance)
cl = self_type.class_ir
return attributes_initialized_by_init_call(op) | cl._sometimes_initialized_attrs
class AttributeMaybeDefinedVisitor(BaseAnalysisVisitor[str]):
"""Find attributes that may have been defined via some code path.
Consider initializations in class body and assignments to 'self.x'
and calls to base class '__init__'.
"""
def __init__(self, self_reg: Register) -> None:
self.self_reg = self_reg
def visit_branch(self, op: Branch) -> tuple[set[str], set[str]]:
return set(), set()
def visit_return(self, op: Return) -> tuple[set[str], set[str]]:
return set(), set()
def visit_unreachable(self, op: Unreachable) -> tuple[set[str], set[str]]:
return set(), set()
def visit_register_op(self, op: RegisterOp) -> tuple[set[str], set[str]]:
if isinstance(op, SetAttr) and op.obj is self.self_reg:
return {op.attr}, set()
if isinstance(op, Call) and op.fn.class_name and op.fn.name == "__init__":
return attributes_maybe_initialized_by_init_call(op), set()
return set(), set()
def visit_assign(self, op: Assign) -> tuple[set[str], set[str]]:
return set(), set()
def visit_assign_multi(self, op: AssignMulti) -> tuple[set[str], set[str]]:
return set(), set()
def visit_set_mem(self, op: SetMem) -> tuple[set[str], set[str]]:
return set(), set()
def analyze_maybe_defined_attrs_in_init(
blocks: list[BasicBlock], self_reg: Register, attrs_with_defaults: set[str], cfg: CFG
) -> AnalysisResult[str]:
return run_analysis(
blocks=blocks,
cfg=cfg,
gen_and_kill=AttributeMaybeDefinedVisitor(self_reg),
initial=attrs_with_defaults,
backward=False,
kind=MAYBE_ANALYSIS,
)
class AttributeMaybeUndefinedVisitor(BaseAnalysisVisitor[str]):
"""Find attributes that may be undefined via some code path.
Consider initializations in class body, assignments to 'self.x'
and calls to base class '__init__'.
"""
def __init__(self, self_reg: Register) -> None:
self.self_reg = self_reg
def visit_branch(self, op: Branch) -> tuple[set[str], set[str]]:
return set(), set()
def visit_return(self, op: Return) -> tuple[set[str], set[str]]:
return set(), set()
def visit_unreachable(self, op: Unreachable) -> tuple[set[str], set[str]]:
return set(), set()
def visit_register_op(self, op: RegisterOp) -> tuple[set[str], set[str]]:
if isinstance(op, SetAttr) and op.obj is self.self_reg:
return set(), {op.attr}
if isinstance(op, Call) and op.fn.class_name and op.fn.name == "__init__":
return set(), attributes_initialized_by_init_call(op)
return set(), set()
def visit_assign(self, op: Assign) -> tuple[set[str], set[str]]:
return set(), set()
def visit_assign_multi(self, op: AssignMulti) -> tuple[set[str], set[str]]:
return set(), set()
def visit_set_mem(self, op: SetMem) -> tuple[set[str], set[str]]:
return set(), set()
def analyze_maybe_undefined_attrs_in_init(
blocks: list[BasicBlock], self_reg: Register, initial_undefined: set[str], cfg: CFG
) -> AnalysisResult[str]:
return run_analysis(
blocks=blocks,
cfg=cfg,
gen_and_kill=AttributeMaybeUndefinedVisitor(self_reg),
initial=initial_undefined,
backward=False,
kind=MAYBE_ANALYSIS,
)
def update_always_defined_attrs_using_subclasses(cl: ClassIR, seen: set[ClassIR]) -> None:
"""Remove attributes not defined in all subclasses from always defined attrs."""
if cl in seen:
return
if cl.children is None:
# Subclasses are unknown
return
removed = set()
for attr in cl._always_initialized_attrs:
for child in cl.children:
update_always_defined_attrs_using_subclasses(child, seen)
if attr not in child._always_initialized_attrs:
removed.add(attr)
cl._always_initialized_attrs -= removed
seen.add(cl)
def detect_undefined_bitmap(cl: ClassIR, seen: set[ClassIR]) -> None:
if cl.is_trait:
return
if cl in seen:
return
seen.add(cl)
for base in cl.base_mro[1:]:
detect_undefined_bitmap(cl, seen)
if len(cl.base_mro) > 1:
cl.bitmap_attrs.extend(cl.base_mro[1].bitmap_attrs)
for n, t in cl.attributes.items():
if t.error_overlap and not cl.is_always_defined(n):
cl.bitmap_attrs.append(n)
for base in cl.mro[1:]:
if base.is_trait:
for n, t in base.attributes.items():
if t.error_overlap and not cl.is_always_defined(n) and n not in cl.bitmap_attrs:
cl.bitmap_attrs.append(n)