Tipragot
628be439b8
Cela permet de ne pas avoir de problèmes de compatibilité car python est dans le git.
1132 lines
44 KiB
Python
1132 lines
44 KiB
Python
"""Generate C code for a Python C extension module from Python source code."""
|
|
|
|
# FIXME: Basically nothing in this file operates on the level of a
|
|
# single module and it should be renamed.
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from typing import Iterable, List, Optional, Tuple, TypeVar
|
|
|
|
from mypy.build import (
|
|
BuildResult,
|
|
BuildSource,
|
|
State,
|
|
build,
|
|
compute_hash,
|
|
create_metastore,
|
|
get_cache_names,
|
|
sorted_components,
|
|
)
|
|
from mypy.errors import CompileError
|
|
from mypy.fscache import FileSystemCache
|
|
from mypy.nodes import MypyFile
|
|
from mypy.options import Options
|
|
from mypy.plugin import Plugin, ReportConfigContext
|
|
from mypy.util import hash_digest
|
|
from mypyc.codegen.cstring import c_string_initializer
|
|
from mypyc.codegen.emit import Emitter, EmitterContext, HeaderDeclaration, c_array_initializer
|
|
from mypyc.codegen.emitclass import generate_class, generate_class_type_decl
|
|
from mypyc.codegen.emitfunc import generate_native_function, native_function_header
|
|
from mypyc.codegen.emitwrapper import (
|
|
generate_legacy_wrapper_function,
|
|
generate_wrapper_function,
|
|
legacy_wrapper_function_header,
|
|
wrapper_function_header,
|
|
)
|
|
from mypyc.codegen.literals import Literals
|
|
from mypyc.common import (
|
|
MODULE_PREFIX,
|
|
PREFIX,
|
|
RUNTIME_C_FILES,
|
|
TOP_LEVEL_NAME,
|
|
shared_lib_name,
|
|
short_id_from_name,
|
|
use_vectorcall,
|
|
)
|
|
from mypyc.errors import Errors
|
|
from mypyc.ir.class_ir import ClassIR
|
|
from mypyc.ir.func_ir import FuncIR
|
|
from mypyc.ir.module_ir import ModuleIR, ModuleIRs, deserialize_modules
|
|
from mypyc.ir.ops import DeserMaps, LoadLiteral
|
|
from mypyc.ir.rtypes import RType
|
|
from mypyc.irbuild.main import build_ir
|
|
from mypyc.irbuild.mapper import Mapper
|
|
from mypyc.irbuild.prepare import load_type_map
|
|
from mypyc.namegen import NameGenerator, exported_name
|
|
from mypyc.options import CompilerOptions
|
|
from mypyc.transform.exceptions import insert_exception_handling
|
|
from mypyc.transform.refcount import insert_ref_count_opcodes
|
|
from mypyc.transform.uninit import insert_uninit_checks
|
|
|
|
# All of the modules being compiled are divided into "groups". A group
|
|
# is a set of modules that are placed into the same shared library.
|
|
# Two common configurations are that every module is placed in a group
|
|
# by itself (fully separate compilation) and that every module is
|
|
# placed in the same group (fully whole-program compilation), but we
|
|
# support finer-grained control of the group as well.
|
|
#
|
|
# In fully whole-program compilation, we will generate N+1 extension
|
|
# modules: one shim per module and one shared library containing all
|
|
# the actual code.
|
|
# In fully separate compilation, we (unfortunately) will generate 2*N
|
|
# extension modules: one shim per module and also one library containing
|
|
# each module's actual code. (This might be fixable in the future,
|
|
# but allows a clean separation between setup of the export tables
|
|
# (see generate_export_table) and running module top levels.)
|
|
#
|
|
# A group is represented as a list of BuildSources containing all of
|
|
# its modules along with the name of the group. (Which can be None
|
|
# only if we are compiling only a single group with a single file in it
|
|
# and not using shared libraries).
|
|
Group = Tuple[List[BuildSource], Optional[str]]
|
|
Groups = List[Group]
|
|
|
|
# A list of (file name, file contents) pairs.
|
|
FileContents = List[Tuple[str, str]]
|
|
|
|
|
|
class MarkedDeclaration:
|
|
"""Add a mark, useful for topological sort."""
|
|
|
|
def __init__(self, declaration: HeaderDeclaration, mark: bool) -> None:
|
|
self.declaration = declaration
|
|
self.mark = False
|
|
|
|
|
|
class MypycPlugin(Plugin):
|
|
"""Plugin for making mypyc interoperate properly with mypy incremental mode.
|
|
|
|
Basically the point of this plugin is to force mypy to recheck things
|
|
based on the demands of mypyc in a couple situations:
|
|
* Any modules in the same group must be compiled together, so we
|
|
tell mypy that modules depend on all their groupmates.
|
|
* If the IR metadata is missing or stale or any of the generated
|
|
C source files associated missing or stale, then we need to
|
|
recompile the module so we mark it as stale.
|
|
"""
|
|
|
|
def __init__(
|
|
self, options: Options, compiler_options: CompilerOptions, groups: Groups
|
|
) -> None:
|
|
super().__init__(options)
|
|
self.group_map: dict[str, tuple[str | None, list[str]]] = {}
|
|
for sources, name in groups:
|
|
modules = sorted(source.module for source in sources)
|
|
for id in modules:
|
|
self.group_map[id] = (name, modules)
|
|
|
|
self.compiler_options = compiler_options
|
|
self.metastore = create_metastore(options)
|
|
|
|
def report_config_data(self, ctx: ReportConfigContext) -> tuple[str | None, list[str]] | None:
|
|
# The config data we report is the group map entry for the module.
|
|
# If the data is being used to check validity, we do additional checks
|
|
# that the IR cache exists and matches the metadata cache and all
|
|
# output source files exist and are up to date.
|
|
|
|
id, path, is_check = ctx.id, ctx.path, ctx.is_check
|
|
|
|
if id not in self.group_map:
|
|
return None
|
|
|
|
# If we aren't doing validity checks, just return the cache data
|
|
if not is_check:
|
|
return self.group_map[id]
|
|
|
|
# Load the metadata and IR cache
|
|
meta_path, _, _ = get_cache_names(id, path, self.options)
|
|
ir_path = get_ir_cache_name(id, path, self.options)
|
|
try:
|
|
meta_json = self.metastore.read(meta_path)
|
|
ir_json = self.metastore.read(ir_path)
|
|
except FileNotFoundError:
|
|
# This could happen if mypyc failed after mypy succeeded
|
|
# in the previous run or if some cache files got
|
|
# deleted. No big deal, just fail to load the cache.
|
|
return None
|
|
|
|
ir_data = json.loads(ir_json)
|
|
|
|
# Check that the IR cache matches the metadata cache
|
|
if compute_hash(meta_json) != ir_data["meta_hash"]:
|
|
return None
|
|
|
|
# Check that all of the source files are present and as
|
|
# expected. The main situation where this would come up is the
|
|
# user deleting the build directory without deleting
|
|
# .mypy_cache, which we should handle gracefully.
|
|
for path, hash in ir_data["src_hashes"].items():
|
|
try:
|
|
with open(os.path.join(self.compiler_options.target_dir, path), "rb") as f:
|
|
contents = f.read()
|
|
except FileNotFoundError:
|
|
return None
|
|
real_hash = hash_digest(contents)
|
|
if hash != real_hash:
|
|
return None
|
|
|
|
return self.group_map[id]
|
|
|
|
def get_additional_deps(self, file: MypyFile) -> list[tuple[int, str, int]]:
|
|
# Report dependency on modules in the module's group
|
|
return [(10, id, -1) for id in self.group_map.get(file.fullname, (None, []))[1]]
|
|
|
|
|
|
def parse_and_typecheck(
|
|
sources: list[BuildSource],
|
|
options: Options,
|
|
compiler_options: CompilerOptions,
|
|
groups: Groups,
|
|
fscache: FileSystemCache | None = None,
|
|
alt_lib_path: str | None = None,
|
|
) -> BuildResult:
|
|
assert options.strict_optional, "strict_optional must be turned on"
|
|
result = build(
|
|
sources=sources,
|
|
options=options,
|
|
alt_lib_path=alt_lib_path,
|
|
fscache=fscache,
|
|
extra_plugins=[MypycPlugin(options, compiler_options, groups)],
|
|
)
|
|
if result.errors:
|
|
raise CompileError(result.errors)
|
|
return result
|
|
|
|
|
|
def compile_scc_to_ir(
|
|
scc: list[MypyFile],
|
|
result: BuildResult,
|
|
mapper: Mapper,
|
|
compiler_options: CompilerOptions,
|
|
errors: Errors,
|
|
) -> ModuleIRs:
|
|
"""Compile an SCC into ModuleIRs.
|
|
|
|
Any modules that this SCC depends on must have either compiled or
|
|
loaded from a cache into mapper.
|
|
|
|
Arguments:
|
|
scc: The list of MypyFiles to compile
|
|
result: The BuildResult from the mypy front-end
|
|
mapper: The Mapper object mapping mypy ASTs to class and func IRs
|
|
compiler_options: The compilation options
|
|
errors: Where to report any errors encountered
|
|
|
|
Returns the IR of the modules.
|
|
"""
|
|
|
|
if compiler_options.verbose:
|
|
print("Compiling {}".format(", ".join(x.name for x in scc)))
|
|
|
|
# Generate basic IR, with missing exception and refcount handling.
|
|
modules = build_ir(scc, result.graph, result.types, mapper, compiler_options, errors)
|
|
if errors.num_errors > 0:
|
|
return modules
|
|
|
|
# Insert uninit checks.
|
|
for module in modules.values():
|
|
for fn in module.functions:
|
|
insert_uninit_checks(fn)
|
|
# Insert exception handling.
|
|
for module in modules.values():
|
|
for fn in module.functions:
|
|
insert_exception_handling(fn)
|
|
# Insert refcount handling.
|
|
for module in modules.values():
|
|
for fn in module.functions:
|
|
insert_ref_count_opcodes(fn)
|
|
|
|
return modules
|
|
|
|
|
|
def compile_modules_to_ir(
|
|
result: BuildResult, mapper: Mapper, compiler_options: CompilerOptions, errors: Errors
|
|
) -> ModuleIRs:
|
|
"""Compile a collection of modules into ModuleIRs.
|
|
|
|
The modules to compile are specified as part of mapper's group_map.
|
|
|
|
Returns the IR of the modules.
|
|
"""
|
|
deser_ctx = DeserMaps({}, {})
|
|
modules = {}
|
|
|
|
# Process the graph by SCC in topological order, like we do in mypy.build
|
|
for scc in sorted_components(result.graph):
|
|
scc_states = [result.graph[id] for id in scc]
|
|
trees = [st.tree for st in scc_states if st.id in mapper.group_map and st.tree]
|
|
|
|
if not trees:
|
|
continue
|
|
|
|
fresh = all(id not in result.manager.rechecked_modules for id in scc)
|
|
if fresh:
|
|
load_scc_from_cache(trees, result, mapper, deser_ctx)
|
|
else:
|
|
scc_ir = compile_scc_to_ir(trees, result, mapper, compiler_options, errors)
|
|
modules.update(scc_ir)
|
|
|
|
return modules
|
|
|
|
|
|
def compile_ir_to_c(
|
|
groups: Groups,
|
|
modules: ModuleIRs,
|
|
result: BuildResult,
|
|
mapper: Mapper,
|
|
compiler_options: CompilerOptions,
|
|
) -> dict[str | None, list[tuple[str, str]]]:
|
|
"""Compile a collection of ModuleIRs to C source text.
|
|
|
|
Returns a dictionary mapping group names to a list of (file name,
|
|
file text) pairs.
|
|
"""
|
|
source_paths = {
|
|
source.module: result.graph[source.module].xpath
|
|
for sources, _ in groups
|
|
for source in sources
|
|
}
|
|
|
|
names = NameGenerator([[source.module for source in sources] for sources, _ in groups])
|
|
|
|
# Generate C code for each compilation group. Each group will be
|
|
# compiled into a separate extension module.
|
|
ctext: dict[str | None, list[tuple[str, str]]] = {}
|
|
for group_sources, group_name in groups:
|
|
group_modules = {
|
|
source.module: modules[source.module]
|
|
for source in group_sources
|
|
if source.module in modules
|
|
}
|
|
if not group_modules:
|
|
ctext[group_name] = []
|
|
continue
|
|
generator = GroupGenerator(
|
|
group_modules, source_paths, group_name, mapper.group_map, names, compiler_options
|
|
)
|
|
ctext[group_name] = generator.generate_c_for_modules()
|
|
|
|
return ctext
|
|
|
|
|
|
def get_ir_cache_name(id: str, path: str, options: Options) -> str:
|
|
meta_path, _, _ = get_cache_names(id, path, options)
|
|
return meta_path.replace(".meta.json", ".ir.json")
|
|
|
|
|
|
def get_state_ir_cache_name(state: State) -> str:
|
|
return get_ir_cache_name(state.id, state.xpath, state.options)
|
|
|
|
|
|
def write_cache(
|
|
modules: ModuleIRs,
|
|
result: BuildResult,
|
|
group_map: dict[str, str | None],
|
|
ctext: dict[str | None, list[tuple[str, str]]],
|
|
) -> None:
|
|
"""Write out the cache information for modules.
|
|
|
|
Each module has the following cache information written (which is
|
|
in addition to the cache information written by mypy itself):
|
|
* A serialized version of its mypyc IR, minus the bodies of
|
|
functions. This allows code that depends on it to use
|
|
these serialized data structures when compiling against it
|
|
instead of needing to recompile it. (Compiling against a
|
|
module requires access to both its mypy and mypyc data
|
|
structures.)
|
|
* The hash of the mypy metadata cache file for the module.
|
|
This is used to ensure that the mypyc cache and the mypy
|
|
cache are in sync and refer to the same version of the code.
|
|
This is particularly important if mypyc crashes/errors/is
|
|
stopped after mypy has written its cache but before mypyc has.
|
|
* The hashes of all of the source file outputs for the group
|
|
the module is in. This is so that the module will be
|
|
recompiled if the source outputs are missing.
|
|
"""
|
|
|
|
hashes = {}
|
|
for name, files in ctext.items():
|
|
hashes[name] = {file: compute_hash(data) for file, data in files}
|
|
|
|
# Write out cache data
|
|
for id, module in modules.items():
|
|
st = result.graph[id]
|
|
|
|
meta_path, _, _ = get_cache_names(id, st.xpath, result.manager.options)
|
|
# If the metadata isn't there, skip writing the cache.
|
|
try:
|
|
meta_data = result.manager.metastore.read(meta_path)
|
|
except OSError:
|
|
continue
|
|
|
|
newpath = get_state_ir_cache_name(st)
|
|
ir_data = {
|
|
"ir": module.serialize(),
|
|
"meta_hash": compute_hash(meta_data),
|
|
"src_hashes": hashes[group_map[id]],
|
|
}
|
|
|
|
result.manager.metastore.write(newpath, json.dumps(ir_data, separators=(",", ":")))
|
|
|
|
result.manager.metastore.commit()
|
|
|
|
|
|
def load_scc_from_cache(
|
|
scc: list[MypyFile], result: BuildResult, mapper: Mapper, ctx: DeserMaps
|
|
) -> ModuleIRs:
|
|
"""Load IR for an SCC of modules from the cache.
|
|
|
|
Arguments and return are as compile_scc_to_ir.
|
|
"""
|
|
cache_data = {
|
|
k.fullname: json.loads(
|
|
result.manager.metastore.read(get_state_ir_cache_name(result.graph[k.fullname]))
|
|
)["ir"]
|
|
for k in scc
|
|
}
|
|
modules = deserialize_modules(cache_data, ctx)
|
|
load_type_map(mapper, scc, ctx)
|
|
return modules
|
|
|
|
|
|
def compile_modules_to_c(
|
|
result: BuildResult, compiler_options: CompilerOptions, errors: Errors, groups: Groups
|
|
) -> tuple[ModuleIRs, list[FileContents]]:
|
|
"""Compile Python module(s) to the source of Python C extension modules.
|
|
|
|
This generates the source code for the "shared library" module
|
|
for each group. The shim modules are generated in mypyc.build.
|
|
Each shared library module provides, for each module in its group,
|
|
a PyCapsule containing an initialization function.
|
|
Additionally, it provides a capsule containing an export table of
|
|
pointers to all of the group's functions and static variables.
|
|
|
|
Arguments:
|
|
result: The BuildResult from the mypy front-end
|
|
compiler_options: The compilation options
|
|
errors: Where to report any errors encountered
|
|
groups: The groups that we are compiling. See documentation of Groups type above.
|
|
|
|
Returns the IR of the modules and a list containing the generated files for each group.
|
|
"""
|
|
# Construct a map from modules to what group they belong to
|
|
group_map = {source.module: lib_name for group, lib_name in groups for source in group}
|
|
mapper = Mapper(group_map)
|
|
|
|
# Sometimes when we call back into mypy, there might be errors.
|
|
# We don't want to crash when that happens.
|
|
result.manager.errors.set_file(
|
|
"<mypyc>", module=None, scope=None, options=result.manager.options
|
|
)
|
|
|
|
modules = compile_modules_to_ir(result, mapper, compiler_options, errors)
|
|
ctext = compile_ir_to_c(groups, modules, result, mapper, compiler_options)
|
|
|
|
if errors.num_errors == 0:
|
|
write_cache(modules, result, group_map, ctext)
|
|
|
|
return modules, [ctext[name] for _, name in groups]
|
|
|
|
|
|
def generate_function_declaration(fn: FuncIR, emitter: Emitter) -> None:
|
|
emitter.context.declarations[emitter.native_function_name(fn.decl)] = HeaderDeclaration(
|
|
f"{native_function_header(fn.decl, emitter)};", needs_export=True
|
|
)
|
|
if fn.name != TOP_LEVEL_NAME:
|
|
if is_fastcall_supported(fn, emitter.capi_version):
|
|
emitter.context.declarations[PREFIX + fn.cname(emitter.names)] = HeaderDeclaration(
|
|
f"{wrapper_function_header(fn, emitter.names)};"
|
|
)
|
|
else:
|
|
emitter.context.declarations[PREFIX + fn.cname(emitter.names)] = HeaderDeclaration(
|
|
f"{legacy_wrapper_function_header(fn, emitter.names)};"
|
|
)
|
|
|
|
|
|
def pointerize(decl: str, name: str) -> str:
|
|
"""Given a C decl and its name, modify it to be a declaration to a pointer."""
|
|
# This doesn't work in general but does work for all our types...
|
|
if "(" in decl:
|
|
# Function pointer. Stick an * in front of the name and wrap it in parens.
|
|
return decl.replace(name, f"(*{name})")
|
|
else:
|
|
# Non-function pointer. Just stick an * in front of the name.
|
|
return decl.replace(name, f"*{name}")
|
|
|
|
|
|
def group_dir(group_name: str) -> str:
|
|
"""Given a group name, return the relative directory path for it."""
|
|
return os.sep.join(group_name.split(".")[:-1])
|
|
|
|
|
|
class GroupGenerator:
|
|
def __init__(
|
|
self,
|
|
modules: dict[str, ModuleIR],
|
|
source_paths: dict[str, str],
|
|
group_name: str | None,
|
|
group_map: dict[str, str | None],
|
|
names: NameGenerator,
|
|
compiler_options: CompilerOptions,
|
|
) -> None:
|
|
"""Generator for C source for a compilation group.
|
|
|
|
The code for a compilation group contains an internal and an
|
|
external .h file, and then one .c if not in multi_file mode or
|
|
one .c file per module if in multi_file mode.)
|
|
|
|
Arguments:
|
|
modules: (name, ir) pairs for each module in the group
|
|
source_paths: Map from module names to source file paths
|
|
group_name: The name of the group (or None if this is single-module compilation)
|
|
group_map: A map of modules to their group names
|
|
names: The name generator for the compilation
|
|
multi_file: Whether to put each module in its own source file regardless
|
|
of group structure.
|
|
"""
|
|
self.modules = modules
|
|
self.source_paths = source_paths
|
|
self.context = EmitterContext(names, group_name, group_map)
|
|
self.names = names
|
|
# Initializations of globals to simple values that we can't
|
|
# do statically because the windows loader is bad.
|
|
self.simple_inits: list[tuple[str, str]] = []
|
|
self.group_name = group_name
|
|
self.use_shared_lib = group_name is not None
|
|
self.compiler_options = compiler_options
|
|
self.multi_file = compiler_options.multi_file
|
|
|
|
@property
|
|
def group_suffix(self) -> str:
|
|
return "_" + exported_name(self.group_name) if self.group_name else ""
|
|
|
|
@property
|
|
def short_group_suffix(self) -> str:
|
|
return "_" + exported_name(self.group_name.split(".")[-1]) if self.group_name else ""
|
|
|
|
def generate_c_for_modules(self) -> list[tuple[str, str]]:
|
|
file_contents = []
|
|
multi_file = self.use_shared_lib and self.multi_file
|
|
|
|
# Collect all literal refs in IR.
|
|
for module in self.modules.values():
|
|
for fn in module.functions:
|
|
collect_literals(fn, self.context.literals)
|
|
|
|
base_emitter = Emitter(self.context)
|
|
# Optionally just include the runtime library c files to
|
|
# reduce the number of compiler invocations needed
|
|
if self.compiler_options.include_runtime_files:
|
|
for name in RUNTIME_C_FILES:
|
|
base_emitter.emit_line(f'#include "{name}"')
|
|
base_emitter.emit_line(f'#include "__native{self.short_group_suffix}.h"')
|
|
base_emitter.emit_line(f'#include "__native_internal{self.short_group_suffix}.h"')
|
|
emitter = base_emitter
|
|
|
|
self.generate_literal_tables()
|
|
|
|
for module_name, module in self.modules.items():
|
|
if multi_file:
|
|
emitter = Emitter(self.context)
|
|
emitter.emit_line(f'#include "__native{self.short_group_suffix}.h"')
|
|
emitter.emit_line(f'#include "__native_internal{self.short_group_suffix}.h"')
|
|
|
|
self.declare_module(module_name, emitter)
|
|
self.declare_internal_globals(module_name, emitter)
|
|
self.declare_imports(module.imports, emitter)
|
|
|
|
for cl in module.classes:
|
|
if cl.is_ext_class:
|
|
generate_class(cl, module_name, emitter)
|
|
|
|
# Generate Python extension module definitions and module initialization functions.
|
|
self.generate_module_def(emitter, module_name, module)
|
|
|
|
for fn in module.functions:
|
|
emitter.emit_line()
|
|
generate_native_function(fn, emitter, self.source_paths[module_name], module_name)
|
|
if fn.name != TOP_LEVEL_NAME:
|
|
emitter.emit_line()
|
|
if is_fastcall_supported(fn, emitter.capi_version):
|
|
generate_wrapper_function(
|
|
fn, emitter, self.source_paths[module_name], module_name
|
|
)
|
|
else:
|
|
generate_legacy_wrapper_function(
|
|
fn, emitter, self.source_paths[module_name], module_name
|
|
)
|
|
if multi_file:
|
|
name = f"__native_{emitter.names.private_name(module_name)}.c"
|
|
file_contents.append((name, "".join(emitter.fragments)))
|
|
|
|
# The external header file contains type declarations while
|
|
# the internal contains declarations of functions and objects
|
|
# (which are shared between shared libraries via dynamic
|
|
# exports tables and not accessed directly.)
|
|
ext_declarations = Emitter(self.context)
|
|
ext_declarations.emit_line(f"#ifndef MYPYC_NATIVE{self.group_suffix}_H")
|
|
ext_declarations.emit_line(f"#define MYPYC_NATIVE{self.group_suffix}_H")
|
|
ext_declarations.emit_line("#include <Python.h>")
|
|
ext_declarations.emit_line("#include <CPy.h>")
|
|
|
|
declarations = Emitter(self.context)
|
|
declarations.emit_line(f"#ifndef MYPYC_NATIVE_INTERNAL{self.group_suffix}_H")
|
|
declarations.emit_line(f"#define MYPYC_NATIVE_INTERNAL{self.group_suffix}_H")
|
|
declarations.emit_line("#include <Python.h>")
|
|
declarations.emit_line("#include <CPy.h>")
|
|
declarations.emit_line(f'#include "__native{self.short_group_suffix}.h"')
|
|
declarations.emit_line()
|
|
declarations.emit_line("int CPyGlobalsInit(void);")
|
|
declarations.emit_line()
|
|
|
|
for module_name, module in self.modules.items():
|
|
self.declare_finals(module_name, module.final_names, declarations)
|
|
for cl in module.classes:
|
|
generate_class_type_decl(cl, emitter, ext_declarations, declarations)
|
|
for fn in module.functions:
|
|
generate_function_declaration(fn, declarations)
|
|
|
|
for lib in sorted(self.context.group_deps):
|
|
elib = exported_name(lib)
|
|
short_lib = exported_name(lib.split(".")[-1])
|
|
declarations.emit_lines(
|
|
"#include <{}>".format(os.path.join(group_dir(lib), f"__native_{short_lib}.h")),
|
|
f"struct export_table_{elib} exports_{elib};",
|
|
)
|
|
|
|
sorted_decls = self.toposort_declarations()
|
|
|
|
emitter = base_emitter
|
|
self.generate_globals_init(emitter)
|
|
|
|
emitter.emit_line()
|
|
|
|
for declaration in sorted_decls:
|
|
decls = ext_declarations if declaration.is_type else declarations
|
|
if not declaration.is_type:
|
|
decls.emit_lines(f"extern {declaration.decl[0]}", *declaration.decl[1:])
|
|
# If there is a definition, emit it. Otherwise repeat the declaration
|
|
# (without an extern).
|
|
if declaration.defn:
|
|
emitter.emit_lines(*declaration.defn)
|
|
else:
|
|
emitter.emit_lines(*declaration.decl)
|
|
else:
|
|
decls.emit_lines(*declaration.decl)
|
|
|
|
if self.group_name:
|
|
self.generate_export_table(ext_declarations, emitter)
|
|
|
|
self.generate_shared_lib_init(emitter)
|
|
|
|
ext_declarations.emit_line("#endif")
|
|
declarations.emit_line("#endif")
|
|
|
|
output_dir = group_dir(self.group_name) if self.group_name else ""
|
|
return file_contents + [
|
|
(
|
|
os.path.join(output_dir, f"__native{self.short_group_suffix}.c"),
|
|
"".join(emitter.fragments),
|
|
),
|
|
(
|
|
os.path.join(output_dir, f"__native_internal{self.short_group_suffix}.h"),
|
|
"".join(declarations.fragments),
|
|
),
|
|
(
|
|
os.path.join(output_dir, f"__native{self.short_group_suffix}.h"),
|
|
"".join(ext_declarations.fragments),
|
|
),
|
|
]
|
|
|
|
def generate_literal_tables(self) -> None:
|
|
"""Generate tables containing descriptions of Python literals to construct.
|
|
|
|
We will store the constructed literals in a single array that contains
|
|
literals of all types. This way we can refer to an arbitrary literal by
|
|
its index.
|
|
"""
|
|
literals = self.context.literals
|
|
# During module initialization we store all the constructed objects here
|
|
self.declare_global("PyObject *[%d]" % literals.num_literals(), "CPyStatics")
|
|
# Descriptions of str literals
|
|
init_str = c_string_array_initializer(literals.encoded_str_values())
|
|
self.declare_global("const char * const []", "CPyLit_Str", initializer=init_str)
|
|
# Descriptions of bytes literals
|
|
init_bytes = c_string_array_initializer(literals.encoded_bytes_values())
|
|
self.declare_global("const char * const []", "CPyLit_Bytes", initializer=init_bytes)
|
|
# Descriptions of int literals
|
|
init_int = c_string_array_initializer(literals.encoded_int_values())
|
|
self.declare_global("const char * const []", "CPyLit_Int", initializer=init_int)
|
|
# Descriptions of float literals
|
|
init_floats = c_array_initializer(literals.encoded_float_values())
|
|
self.declare_global("const double []", "CPyLit_Float", initializer=init_floats)
|
|
# Descriptions of complex literals
|
|
init_complex = c_array_initializer(literals.encoded_complex_values())
|
|
self.declare_global("const double []", "CPyLit_Complex", initializer=init_complex)
|
|
# Descriptions of tuple literals
|
|
init_tuple = c_array_initializer(literals.encoded_tuple_values())
|
|
self.declare_global("const int []", "CPyLit_Tuple", initializer=init_tuple)
|
|
# Descriptions of frozenset literals
|
|
init_frozenset = c_array_initializer(literals.encoded_frozenset_values())
|
|
self.declare_global("const int []", "CPyLit_FrozenSet", initializer=init_frozenset)
|
|
|
|
def generate_export_table(self, decl_emitter: Emitter, code_emitter: Emitter) -> None:
|
|
"""Generate the declaration and definition of the group's export struct.
|
|
|
|
To avoid needing to deal with deeply platform specific issues
|
|
involving dynamic library linking (and some possibly
|
|
insurmountable issues involving cyclic dependencies), compiled
|
|
code accesses functions and data in other compilation groups
|
|
via an explicit "export struct".
|
|
|
|
Each group declares a struct type that contains a pointer to
|
|
every function and static variable it exports. It then
|
|
populates this struct and stores a pointer to it in a capsule
|
|
stored as an attribute named 'exports' on the group's shared
|
|
library's python module.
|
|
|
|
On load, a group's init function will import all of its
|
|
dependencies' exports tables using the capsule mechanism and
|
|
copy the contents into a local copy of the table (to eliminate
|
|
the need for a pointer indirection when accessing it).
|
|
|
|
Then, all calls to functions in another group and accesses to statics
|
|
from another group are done indirectly via the export table.
|
|
|
|
For example, a group containing a module b, where b contains a class B
|
|
and a function bar, would declare an export table like:
|
|
struct export_table_b {
|
|
PyTypeObject **CPyType_B;
|
|
PyObject *(*CPyDef_B)(CPyTagged cpy_r_x);
|
|
CPyTagged (*CPyDef_B___foo)(PyObject *cpy_r_self, CPyTagged cpy_r_y);
|
|
tuple_T2OI (*CPyDef_bar)(PyObject *cpy_r_x);
|
|
char (*CPyDef___top_level__)(void);
|
|
};
|
|
that would be initialized with:
|
|
static struct export_table_b exports = {
|
|
&CPyType_B,
|
|
&CPyDef_B,
|
|
&CPyDef_B___foo,
|
|
&CPyDef_bar,
|
|
&CPyDef___top_level__,
|
|
};
|
|
To call `b.foo`, then, a function in another group would do
|
|
`exports_b.CPyDef_bar(...)`.
|
|
"""
|
|
|
|
decls = decl_emitter.context.declarations
|
|
|
|
decl_emitter.emit_lines("", f"struct export_table{self.group_suffix} {{")
|
|
for name, decl in decls.items():
|
|
if decl.needs_export:
|
|
decl_emitter.emit_line(pointerize("\n".join(decl.decl), name))
|
|
|
|
decl_emitter.emit_line("};")
|
|
|
|
code_emitter.emit_lines("", f"static struct export_table{self.group_suffix} exports = {{")
|
|
for name, decl in decls.items():
|
|
if decl.needs_export:
|
|
code_emitter.emit_line(f"&{name},")
|
|
|
|
code_emitter.emit_line("};")
|
|
|
|
def generate_shared_lib_init(self, emitter: Emitter) -> None:
|
|
"""Generate the init function for a shared library.
|
|
|
|
A shared library contains all of the actual code for a
|
|
compilation group.
|
|
|
|
The init function is responsible for creating Capsules that
|
|
wrap pointers to the initialization function of all the real
|
|
init functions for modules in this shared library as well as
|
|
the export table containing all of the exported functions and
|
|
values from all the modules.
|
|
|
|
These capsules are stored in attributes of the shared library.
|
|
"""
|
|
assert self.group_name is not None
|
|
|
|
emitter.emit_line()
|
|
emitter.emit_lines(
|
|
"PyMODINIT_FUNC PyInit_{}(void)".format(
|
|
shared_lib_name(self.group_name).split(".")[-1]
|
|
),
|
|
"{",
|
|
(
|
|
'static PyModuleDef def = {{ PyModuleDef_HEAD_INIT, "{}", NULL, -1, NULL, NULL }};'.format(
|
|
shared_lib_name(self.group_name)
|
|
)
|
|
),
|
|
"int res;",
|
|
"PyObject *capsule;",
|
|
"PyObject *tmp;",
|
|
"static PyObject *module;",
|
|
"if (module) {",
|
|
"Py_INCREF(module);",
|
|
"return module;",
|
|
"}",
|
|
"module = PyModule_Create(&def);",
|
|
"if (!module) {",
|
|
"goto fail;",
|
|
"}",
|
|
"",
|
|
)
|
|
|
|
emitter.emit_lines(
|
|
'capsule = PyCapsule_New(&exports, "{}.exports", NULL);'.format(
|
|
shared_lib_name(self.group_name)
|
|
),
|
|
"if (!capsule) {",
|
|
"goto fail;",
|
|
"}",
|
|
'res = PyObject_SetAttrString(module, "exports", capsule);',
|
|
"Py_DECREF(capsule);",
|
|
"if (res < 0) {",
|
|
"goto fail;",
|
|
"}",
|
|
"",
|
|
)
|
|
|
|
for mod in self.modules:
|
|
name = exported_name(mod)
|
|
emitter.emit_lines(
|
|
f"extern PyObject *CPyInit_{name}(void);",
|
|
'capsule = PyCapsule_New((void *)CPyInit_{}, "{}.init_{}", NULL);'.format(
|
|
name, shared_lib_name(self.group_name), name
|
|
),
|
|
"if (!capsule) {",
|
|
"goto fail;",
|
|
"}",
|
|
f'res = PyObject_SetAttrString(module, "init_{name}", capsule);',
|
|
"Py_DECREF(capsule);",
|
|
"if (res < 0) {",
|
|
"goto fail;",
|
|
"}",
|
|
"",
|
|
)
|
|
|
|
for group in sorted(self.context.group_deps):
|
|
egroup = exported_name(group)
|
|
emitter.emit_lines(
|
|
'tmp = PyImport_ImportModule("{}"); if (!tmp) goto fail; Py_DECREF(tmp);'.format(
|
|
shared_lib_name(group)
|
|
),
|
|
'struct export_table_{} *pexports_{} = PyCapsule_Import("{}.exports", 0);'.format(
|
|
egroup, egroup, shared_lib_name(group)
|
|
),
|
|
f"if (!pexports_{egroup}) {{",
|
|
"goto fail;",
|
|
"}",
|
|
"memcpy(&exports_{group}, pexports_{group}, sizeof(exports_{group}));".format(
|
|
group=egroup
|
|
),
|
|
"",
|
|
)
|
|
|
|
emitter.emit_lines("return module;", "fail:", "Py_XDECREF(module);", "return NULL;", "}")
|
|
|
|
def generate_globals_init(self, emitter: Emitter) -> None:
|
|
emitter.emit_lines(
|
|
"",
|
|
"int CPyGlobalsInit(void)",
|
|
"{",
|
|
"static int is_initialized = 0;",
|
|
"if (is_initialized) return 0;",
|
|
"",
|
|
)
|
|
|
|
emitter.emit_line("CPy_Init();")
|
|
for symbol, fixup in self.simple_inits:
|
|
emitter.emit_line(f"{symbol} = {fixup};")
|
|
|
|
values = "CPyLit_Str, CPyLit_Bytes, CPyLit_Int, CPyLit_Float, CPyLit_Complex, CPyLit_Tuple, CPyLit_FrozenSet"
|
|
emitter.emit_lines(
|
|
f"if (CPyStatics_Initialize(CPyStatics, {values}) < 0) {{", "return -1;", "}"
|
|
)
|
|
|
|
emitter.emit_lines("is_initialized = 1;", "return 0;", "}")
|
|
|
|
def generate_module_def(self, emitter: Emitter, module_name: str, module: ModuleIR) -> None:
|
|
"""Emit the PyModuleDef struct for a module and the module init function."""
|
|
# Emit module methods
|
|
module_prefix = emitter.names.private_name(module_name)
|
|
emitter.emit_line(f"static PyMethodDef {module_prefix}module_methods[] = {{")
|
|
for fn in module.functions:
|
|
if fn.class_name is not None or fn.name == TOP_LEVEL_NAME:
|
|
continue
|
|
name = short_id_from_name(fn.name, fn.decl.shortname, fn.line)
|
|
if is_fastcall_supported(fn, emitter.capi_version):
|
|
flag = "METH_FASTCALL"
|
|
else:
|
|
flag = "METH_VARARGS"
|
|
emitter.emit_line(
|
|
(
|
|
'{{"{name}", (PyCFunction){prefix}{cname}, {flag} | METH_KEYWORDS, '
|
|
"NULL /* docstring */}},"
|
|
).format(name=name, cname=fn.cname(emitter.names), prefix=PREFIX, flag=flag)
|
|
)
|
|
emitter.emit_line("{NULL, NULL, 0, NULL}")
|
|
emitter.emit_line("};")
|
|
emitter.emit_line()
|
|
|
|
# Emit module definition struct
|
|
emitter.emit_lines(
|
|
f"static struct PyModuleDef {module_prefix}module = {{",
|
|
"PyModuleDef_HEAD_INIT,",
|
|
f'"{module_name}",',
|
|
"NULL, /* docstring */",
|
|
"-1, /* size of per-interpreter state of the module,",
|
|
" or -1 if the module keeps state in global variables. */",
|
|
f"{module_prefix}module_methods",
|
|
"};",
|
|
)
|
|
emitter.emit_line()
|
|
# Emit module init function. If we are compiling just one module, this
|
|
# will be the C API init function. If we are compiling 2+ modules, we
|
|
# generate a shared library for the modules and shims that call into
|
|
# the shared library, and in this case we use an internal module
|
|
# initialized function that will be called by the shim.
|
|
if not self.use_shared_lib:
|
|
declaration = f"PyMODINIT_FUNC PyInit_{module_name}(void)"
|
|
else:
|
|
declaration = f"PyObject *CPyInit_{exported_name(module_name)}(void)"
|
|
emitter.emit_lines(declaration, "{")
|
|
emitter.emit_line("PyObject* modname = NULL;")
|
|
# Store the module reference in a static and return it when necessary.
|
|
# This is separate from the *global* reference to the module that will
|
|
# be populated when it is imported by a compiled module. We want that
|
|
# reference to only be populated when the module has been successfully
|
|
# imported, whereas this we want to have to stop a circular import.
|
|
module_static = self.module_internal_static_name(module_name, emitter)
|
|
|
|
emitter.emit_lines(
|
|
f"if ({module_static}) {{",
|
|
f"Py_INCREF({module_static});",
|
|
f"return {module_static};",
|
|
"}",
|
|
)
|
|
|
|
emitter.emit_lines(
|
|
f"{module_static} = PyModule_Create(&{module_prefix}module);",
|
|
f"if (unlikely({module_static} == NULL))",
|
|
" goto fail;",
|
|
)
|
|
emitter.emit_line(
|
|
f'modname = PyObject_GetAttrString((PyObject *){module_static}, "__name__");'
|
|
)
|
|
|
|
module_globals = emitter.static_name("globals", module_name)
|
|
emitter.emit_lines(
|
|
f"{module_globals} = PyModule_GetDict({module_static});",
|
|
f"if (unlikely({module_globals} == NULL))",
|
|
" goto fail;",
|
|
)
|
|
|
|
# HACK: Manually instantiate generated classes here
|
|
type_structs: list[str] = []
|
|
for cl in module.classes:
|
|
type_struct = emitter.type_struct_name(cl)
|
|
type_structs.append(type_struct)
|
|
if cl.is_generated:
|
|
emitter.emit_lines(
|
|
"{t} = (PyTypeObject *)CPyType_FromTemplate("
|
|
"(PyObject *){t}_template, NULL, modname);".format(t=type_struct)
|
|
)
|
|
emitter.emit_lines(f"if (unlikely(!{type_struct}))", " goto fail;")
|
|
|
|
emitter.emit_lines("if (CPyGlobalsInit() < 0)", " goto fail;")
|
|
|
|
self.generate_top_level_call(module, emitter)
|
|
|
|
emitter.emit_lines("Py_DECREF(modname);")
|
|
|
|
emitter.emit_line(f"return {module_static};")
|
|
emitter.emit_lines("fail:", f"Py_CLEAR({module_static});", "Py_CLEAR(modname);")
|
|
for name, typ in module.final_names:
|
|
static_name = emitter.static_name(name, module_name)
|
|
emitter.emit_dec_ref(static_name, typ, is_xdec=True)
|
|
undef = emitter.c_undefined_value(typ)
|
|
emitter.emit_line(f"{static_name} = {undef};")
|
|
# the type objects returned from CPyType_FromTemplate are all new references
|
|
# so we have to decref them
|
|
for t in type_structs:
|
|
emitter.emit_line(f"Py_CLEAR({t});")
|
|
emitter.emit_line("return NULL;")
|
|
emitter.emit_line("}")
|
|
|
|
def generate_top_level_call(self, module: ModuleIR, emitter: Emitter) -> None:
|
|
"""Generate call to function representing module top level."""
|
|
# Optimization: we tend to put the top level last, so reverse iterate
|
|
for fn in reversed(module.functions):
|
|
if fn.name == TOP_LEVEL_NAME:
|
|
emitter.emit_lines(
|
|
f"char result = {emitter.native_function_name(fn.decl)}();",
|
|
"if (result == 2)",
|
|
" goto fail;",
|
|
)
|
|
break
|
|
|
|
def toposort_declarations(self) -> list[HeaderDeclaration]:
|
|
"""Topologically sort the declaration dict by dependencies.
|
|
|
|
Declarations can require other declarations to come prior in C (such as declaring structs).
|
|
In order to guarantee that the C output will compile the declarations will thus need to
|
|
be properly ordered. This simple DFS guarantees that we have a proper ordering.
|
|
|
|
This runs in O(V + E).
|
|
"""
|
|
result = []
|
|
marked_declarations: dict[str, MarkedDeclaration] = {}
|
|
for k, v in self.context.declarations.items():
|
|
marked_declarations[k] = MarkedDeclaration(v, False)
|
|
|
|
def _toposort_visit(name: str) -> None:
|
|
decl = marked_declarations[name]
|
|
if decl.mark:
|
|
return
|
|
|
|
for child in decl.declaration.dependencies:
|
|
_toposort_visit(child)
|
|
|
|
result.append(decl.declaration)
|
|
decl.mark = True
|
|
|
|
for name, marked_declaration in marked_declarations.items():
|
|
_toposort_visit(name)
|
|
|
|
return result
|
|
|
|
def declare_global(
|
|
self, type_spaced: str, name: str, *, initializer: str | None = None
|
|
) -> None:
|
|
if "[" not in type_spaced:
|
|
base = f"{type_spaced}{name}"
|
|
else:
|
|
a, b = type_spaced.split("[", 1)
|
|
base = f"{a}{name}[{b}"
|
|
|
|
if not initializer:
|
|
defn = None
|
|
else:
|
|
defn = [f"{base} = {initializer};"]
|
|
if name not in self.context.declarations:
|
|
self.context.declarations[name] = HeaderDeclaration(f"{base};", defn=defn)
|
|
|
|
def declare_internal_globals(self, module_name: str, emitter: Emitter) -> None:
|
|
static_name = emitter.static_name("globals", module_name)
|
|
self.declare_global("PyObject *", static_name)
|
|
|
|
def module_internal_static_name(self, module_name: str, emitter: Emitter) -> str:
|
|
return emitter.static_name(module_name + "_internal", None, prefix=MODULE_PREFIX)
|
|
|
|
def declare_module(self, module_name: str, emitter: Emitter) -> None:
|
|
# We declare two globals for each compiled module:
|
|
# one used internally in the implementation of module init to cache results
|
|
# and prevent infinite recursion in import cycles, and one used
|
|
# by other modules to refer to it.
|
|
if module_name in self.modules:
|
|
internal_static_name = self.module_internal_static_name(module_name, emitter)
|
|
self.declare_global("CPyModule *", internal_static_name, initializer="NULL")
|
|
static_name = emitter.static_name(module_name, None, prefix=MODULE_PREFIX)
|
|
self.declare_global("CPyModule *", static_name)
|
|
self.simple_inits.append((static_name, "Py_None"))
|
|
|
|
def declare_imports(self, imps: Iterable[str], emitter: Emitter) -> None:
|
|
for imp in imps:
|
|
self.declare_module(imp, emitter)
|
|
|
|
def declare_finals(
|
|
self, module: str, final_names: Iterable[tuple[str, RType]], emitter: Emitter
|
|
) -> None:
|
|
for name, typ in final_names:
|
|
static_name = emitter.static_name(name, module)
|
|
emitter.context.declarations[static_name] = HeaderDeclaration(
|
|
f"{emitter.ctype_spaced(typ)}{static_name};",
|
|
[self.final_definition(module, name, typ, emitter)],
|
|
needs_export=True,
|
|
)
|
|
|
|
def final_definition(self, module: str, name: str, typ: RType, emitter: Emitter) -> str:
|
|
static_name = emitter.static_name(name, module)
|
|
# Here we rely on the fact that undefined value and error value are always the same
|
|
undefined = emitter.c_initializer_undefined_value(typ)
|
|
return f"{emitter.ctype_spaced(typ)}{static_name} = {undefined};"
|
|
|
|
def declare_static_pyobject(self, identifier: str, emitter: Emitter) -> None:
|
|
symbol = emitter.static_name(identifier, None)
|
|
self.declare_global("PyObject *", symbol)
|
|
|
|
|
|
def sort_classes(classes: list[tuple[str, ClassIR]]) -> list[tuple[str, ClassIR]]:
|
|
mod_name = {ir: name for name, ir in classes}
|
|
irs = [ir for _, ir in classes]
|
|
deps: dict[ClassIR, set[ClassIR]] = {}
|
|
for ir in irs:
|
|
if ir not in deps:
|
|
deps[ir] = set()
|
|
if ir.base:
|
|
deps[ir].add(ir.base)
|
|
deps[ir].update(ir.traits)
|
|
sorted_irs = toposort(deps)
|
|
return [(mod_name[ir], ir) for ir in sorted_irs]
|
|
|
|
|
|
T = TypeVar("T")
|
|
|
|
|
|
def toposort(deps: dict[T, set[T]]) -> list[T]:
|
|
"""Topologically sort a dict from item to dependencies.
|
|
|
|
This runs in O(V + E).
|
|
"""
|
|
result = []
|
|
visited: set[T] = set()
|
|
|
|
def visit(item: T) -> None:
|
|
if item in visited:
|
|
return
|
|
|
|
for child in deps[item]:
|
|
visit(child)
|
|
|
|
result.append(item)
|
|
visited.add(item)
|
|
|
|
for item in deps:
|
|
visit(item)
|
|
|
|
return result
|
|
|
|
|
|
def is_fastcall_supported(fn: FuncIR, capi_version: tuple[int, int]) -> bool:
|
|
if fn.class_name is not None:
|
|
if fn.name == "__call__":
|
|
# We can use vectorcalls (PEP 590) when supported
|
|
return use_vectorcall(capi_version)
|
|
# TODO: Support fastcall for __init__.
|
|
return fn.name != "__init__"
|
|
return True
|
|
|
|
|
|
def collect_literals(fn: FuncIR, literals: Literals) -> None:
|
|
"""Store all Python literal object refs in fn.
|
|
|
|
Collecting literals must happen only after we have the final IR.
|
|
This way we won't include literals that have been optimized away.
|
|
"""
|
|
for block in fn.blocks:
|
|
for op in block.ops:
|
|
if isinstance(op, LoadLiteral):
|
|
literals.record_literal(op.value)
|
|
|
|
|
|
def c_string_array_initializer(components: list[bytes]) -> str:
|
|
result = []
|
|
result.append("{\n")
|
|
for s in components:
|
|
result.append(" " + c_string_initializer(s) + ",\n")
|
|
result.append("}")
|
|
return "".join(result)
|