"""Always defined attribute analysis. An always defined attribute has some statements in __init__ or the class body that cause the attribute to be always initialized when an instance is constructed. It must also not be possible to read the attribute before initialization, and it can't be deletable. We can assume that the value is always defined when reading an always defined attribute. Otherwise we'll need to raise AttributeError if the value is undefined (i.e. has the error value). We use data flow analysis to figure out attributes that are always defined. Example: class C: def __init__(self) -> None: self.x = 0 if func(): self.y = 1 else: self.y = 2 self.z = 3 In this example, the attributes 'x' and 'y' are always defined, but 'z' is not. The analysis assumes that we know that there won't be any subclasses. The analysis also works if there is a known, closed set of subclasses. An attribute defined in a base class can only be always defined if it's also always defined in all subclasses. As soon as __init__ contains an op that can 'leak' self to another function, we will stop inferring always defined attributes, since the analysis is mostly intra-procedural and only looks at __init__ methods. The called code could read an uninitialized attribute. Example: class C: def __init__(self) -> None: self.x = self.foo() def foo(self) -> int: ... Now we won't infer 'x' as always defined, since 'foo' might read 'x' before initialization. As an exception to the above limitation, we perform inter-procedural analysis of super().__init__ calls, since these are very common. Our analysis is somewhat optimistic. We assume that nobody calls a method of a partially uninitialized object through gc.get_objects(), in particular. Code like this could potentially cause a segfault with a null pointer dereference. This seems very unlikely to be an issue in practice, however. Accessing an attribute via getattr always checks for undefined attributes and thus works if the object is partially uninitialized. This can be used as a workaround if somebody ever needs to inspect partially uninitialized objects via gc.get_objects(). The analysis runs after IR building as a separate pass. Since we only run this on __init__ methods, this analysis pass will be fairly quick. """ from __future__ import annotations from typing import Final, Set, Tuple from mypyc.analysis.dataflow import ( CFG, MAYBE_ANALYSIS, AnalysisResult, BaseAnalysisVisitor, get_cfg, run_analysis, ) from mypyc.analysis.selfleaks import analyze_self_leaks from mypyc.ir.class_ir import ClassIR from mypyc.ir.ops import ( Assign, AssignMulti, BasicBlock, Branch, Call, ControlOp, GetAttr, Register, RegisterOp, Return, SetAttr, SetMem, Unreachable, ) from mypyc.ir.rtypes import RInstance # If True, print out all always-defined attributes of native classes (to aid # debugging and testing) dump_always_defined: Final = False def analyze_always_defined_attrs(class_irs: list[ClassIR]) -> None: """Find always defined attributes all classes of a compilation unit. Also tag attribute initialization ops to not decref the previous value (as this would read a NULL pointer and segfault). Update the _always_initialized_attrs, _sometimes_initialized_attrs and init_self_leak attributes in ClassIR instances. This is the main entry point. """ seen: set[ClassIR] = set() # First pass: only look at target class and classes in MRO for cl in class_irs: analyze_always_defined_attrs_in_class(cl, seen) # Second pass: look at all derived class seen = set() for cl in class_irs: update_always_defined_attrs_using_subclasses(cl, seen) # Final pass: detect attributes that need to use a bitmap to track definedness seen = set() for cl in class_irs: detect_undefined_bitmap(cl, seen) def analyze_always_defined_attrs_in_class(cl: ClassIR, seen: set[ClassIR]) -> None: if cl in seen: return seen.add(cl) if ( cl.is_trait or cl.inherits_python or cl.allow_interpreted_subclasses or cl.builtin_base is not None or cl.children is None or cl.is_serializable() ): # Give up -- we can't enforce that attributes are always defined. return # First analyze all base classes. Track seen classes to avoid duplicate work. for base in cl.mro[1:]: analyze_always_defined_attrs_in_class(base, seen) m = cl.get_method("__init__") if m is None: cl._always_initialized_attrs = cl.attrs_with_defaults.copy() cl._sometimes_initialized_attrs = cl.attrs_with_defaults.copy() return self_reg = m.arg_regs[0] cfg = get_cfg(m.blocks) dirty = analyze_self_leaks(m.blocks, self_reg, cfg) maybe_defined = analyze_maybe_defined_attrs_in_init( m.blocks, self_reg, cl.attrs_with_defaults, cfg ) all_attrs: set[str] = set() for base in cl.mro: all_attrs.update(base.attributes) maybe_undefined = analyze_maybe_undefined_attrs_in_init( m.blocks, self_reg, initial_undefined=all_attrs - cl.attrs_with_defaults, cfg=cfg ) always_defined = find_always_defined_attributes( m.blocks, self_reg, all_attrs, maybe_defined, maybe_undefined, dirty ) always_defined = {a for a in always_defined if not cl.is_deletable(a)} cl._always_initialized_attrs = always_defined if dump_always_defined: print(cl.name, sorted(always_defined)) cl._sometimes_initialized_attrs = find_sometimes_defined_attributes( m.blocks, self_reg, maybe_defined, dirty ) mark_attr_initialiation_ops(m.blocks, self_reg, maybe_defined, dirty) # Check if __init__ can run unpredictable code (leak 'self'). any_dirty = False for b in m.blocks: for i, op in enumerate(b.ops): if dirty.after[b, i] and not isinstance(op, Return): any_dirty = True break cl.init_self_leak = any_dirty def find_always_defined_attributes( blocks: list[BasicBlock], self_reg: Register, all_attrs: set[str], maybe_defined: AnalysisResult[str], maybe_undefined: AnalysisResult[str], dirty: AnalysisResult[None], ) -> set[str]: """Find attributes that are always initialized in some basic blocks. The analysis results are expected to be up-to-date for the blocks. Return a set of always defined attributes. """ attrs = all_attrs.copy() for block in blocks: for i, op in enumerate(block.ops): # If an attribute we *read* may be undefined, it isn't always defined. if isinstance(op, GetAttr) and op.obj is self_reg: if op.attr in maybe_undefined.before[block, i]: attrs.discard(op.attr) # If an attribute we *set* may be sometimes undefined and # sometimes defined, don't consider it always defined. Unlike # the get case, it's fine for the attribute to be undefined. # The set operation will then be treated as initialization. if isinstance(op, SetAttr) and op.obj is self_reg: if ( op.attr in maybe_undefined.before[block, i] and op.attr in maybe_defined.before[block, i] ): attrs.discard(op.attr) # Treat an op that might run arbitrary code as an "exit" # in terms of the analysis -- we can't do any inference # afterwards reliably. if dirty.after[block, i]: if not dirty.before[block, i]: attrs = attrs & ( maybe_defined.after[block, i] - maybe_undefined.after[block, i] ) break if isinstance(op, ControlOp): for target in op.targets(): # Gotos/branches can also be "exits". if not dirty.after[block, i] and dirty.before[target, 0]: attrs = attrs & ( maybe_defined.after[target, 0] - maybe_undefined.after[target, 0] ) return attrs def find_sometimes_defined_attributes( blocks: list[BasicBlock], self_reg: Register, maybe_defined: AnalysisResult[str], dirty: AnalysisResult[None], ) -> set[str]: """Find attributes that are sometimes initialized in some basic blocks.""" attrs: set[str] = set() for block in blocks: for i, op in enumerate(block.ops): # Only look at possibly defined attributes at exits. if dirty.after[block, i]: if not dirty.before[block, i]: attrs = attrs | maybe_defined.after[block, i] break if isinstance(op, ControlOp): for target in op.targets(): if not dirty.after[block, i] and dirty.before[target, 0]: attrs = attrs | maybe_defined.after[target, 0] return attrs def mark_attr_initialiation_ops( blocks: list[BasicBlock], self_reg: Register, maybe_defined: AnalysisResult[str], dirty: AnalysisResult[None], ) -> None: """Tag all SetAttr ops in the basic blocks that initialize attributes. Initialization ops assume that the previous attribute value is the error value, so there's no need to decref or check for definedness. """ for block in blocks: for i, op in enumerate(block.ops): if isinstance(op, SetAttr) and op.obj is self_reg: attr = op.attr if attr not in maybe_defined.before[block, i] and not dirty.after[block, i]: op.mark_as_initializer() GenAndKill = Tuple[Set[str], Set[str]] def attributes_initialized_by_init_call(op: Call) -> set[str]: """Calculate attributes that are always initialized by a super().__init__ call.""" self_type = op.fn.sig.args[0].type assert isinstance(self_type, RInstance) cl = self_type.class_ir return {a for base in cl.mro for a in base.attributes if base.is_always_defined(a)} def attributes_maybe_initialized_by_init_call(op: Call) -> set[str]: """Calculate attributes that may be initialized by a super().__init__ call.""" self_type = op.fn.sig.args[0].type assert isinstance(self_type, RInstance) cl = self_type.class_ir return attributes_initialized_by_init_call(op) | cl._sometimes_initialized_attrs class AttributeMaybeDefinedVisitor(BaseAnalysisVisitor[str]): """Find attributes that may have been defined via some code path. Consider initializations in class body and assignments to 'self.x' and calls to base class '__init__'. """ def __init__(self, self_reg: Register) -> None: self.self_reg = self_reg def visit_branch(self, op: Branch) -> tuple[set[str], set[str]]: return set(), set() def visit_return(self, op: Return) -> tuple[set[str], set[str]]: return set(), set() def visit_unreachable(self, op: Unreachable) -> tuple[set[str], set[str]]: return set(), set() def visit_register_op(self, op: RegisterOp) -> tuple[set[str], set[str]]: if isinstance(op, SetAttr) and op.obj is self.self_reg: return {op.attr}, set() if isinstance(op, Call) and op.fn.class_name and op.fn.name == "__init__": return attributes_maybe_initialized_by_init_call(op), set() return set(), set() def visit_assign(self, op: Assign) -> tuple[set[str], set[str]]: return set(), set() def visit_assign_multi(self, op: AssignMulti) -> tuple[set[str], set[str]]: return set(), set() def visit_set_mem(self, op: SetMem) -> tuple[set[str], set[str]]: return set(), set() def analyze_maybe_defined_attrs_in_init( blocks: list[BasicBlock], self_reg: Register, attrs_with_defaults: set[str], cfg: CFG ) -> AnalysisResult[str]: return run_analysis( blocks=blocks, cfg=cfg, gen_and_kill=AttributeMaybeDefinedVisitor(self_reg), initial=attrs_with_defaults, backward=False, kind=MAYBE_ANALYSIS, ) class AttributeMaybeUndefinedVisitor(BaseAnalysisVisitor[str]): """Find attributes that may be undefined via some code path. Consider initializations in class body, assignments to 'self.x' and calls to base class '__init__'. """ def __init__(self, self_reg: Register) -> None: self.self_reg = self_reg def visit_branch(self, op: Branch) -> tuple[set[str], set[str]]: return set(), set() def visit_return(self, op: Return) -> tuple[set[str], set[str]]: return set(), set() def visit_unreachable(self, op: Unreachable) -> tuple[set[str], set[str]]: return set(), set() def visit_register_op(self, op: RegisterOp) -> tuple[set[str], set[str]]: if isinstance(op, SetAttr) and op.obj is self.self_reg: return set(), {op.attr} if isinstance(op, Call) and op.fn.class_name and op.fn.name == "__init__": return set(), attributes_initialized_by_init_call(op) return set(), set() def visit_assign(self, op: Assign) -> tuple[set[str], set[str]]: return set(), set() def visit_assign_multi(self, op: AssignMulti) -> tuple[set[str], set[str]]: return set(), set() def visit_set_mem(self, op: SetMem) -> tuple[set[str], set[str]]: return set(), set() def analyze_maybe_undefined_attrs_in_init( blocks: list[BasicBlock], self_reg: Register, initial_undefined: set[str], cfg: CFG ) -> AnalysisResult[str]: return run_analysis( blocks=blocks, cfg=cfg, gen_and_kill=AttributeMaybeUndefinedVisitor(self_reg), initial=initial_undefined, backward=False, kind=MAYBE_ANALYSIS, ) def update_always_defined_attrs_using_subclasses(cl: ClassIR, seen: set[ClassIR]) -> None: """Remove attributes not defined in all subclasses from always defined attrs.""" if cl in seen: return if cl.children is None: # Subclasses are unknown return removed = set() for attr in cl._always_initialized_attrs: for child in cl.children: update_always_defined_attrs_using_subclasses(child, seen) if attr not in child._always_initialized_attrs: removed.add(attr) cl._always_initialized_attrs -= removed seen.add(cl) def detect_undefined_bitmap(cl: ClassIR, seen: set[ClassIR]) -> None: if cl.is_trait: return if cl in seen: return seen.add(cl) for base in cl.base_mro[1:]: detect_undefined_bitmap(cl, seen) if len(cl.base_mro) > 1: cl.bitmap_attrs.extend(cl.base_mro[1].bitmap_attrs) for n, t in cl.attributes.items(): if t.error_overlap and not cl.is_always_defined(n): cl.bitmap_attrs.append(n) for base in cl.mro[1:]: if base.is_trait: for n, t in base.attributes.items(): if t.error_overlap and not cl.is_always_defined(n) and n not in cl.bitmap_attrs: cl.bitmap_attrs.append(n)