Skip to content

Commit

Permalink
[mypyc] Make a bunch of the build process more configurable (python#7939
Browse files Browse the repository at this point in the history
)
  • Loading branch information
msullivan authored Nov 12, 2019
1 parent 8d562e2 commit c6c99b9
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 68 deletions.
93 changes: 52 additions & 41 deletions mypyc/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,25 @@
hackily decide based on whether setuptools has been imported already.
"""

import glob
import sys
import os.path
import hashlib
import time
import re

from typing import List, Tuple, Any, Optional, Dict, Union, Set, cast
from typing import List, Tuple, Any, Optional, Dict, Union, Set, Iterable, cast
from typing_extensions import TYPE_CHECKING, NoReturn, Type

from mypy.main import process_options
from mypy.errors import CompileError
from mypy.options import Options
from mypy.build import BuildSource
from mypy.fscache import FileSystemCache

from mypyc.namegen import exported_name
from mypyc.options import CompilerOptions
from mypyc.errors import Errors
from mypyc.common import BUILD_DIR, shared_lib_name
from mypyc.common import shared_lib_name
from mypyc.ops import format_modules

from mypyc import emitmodule
Expand Down Expand Up @@ -78,16 +79,21 @@ def fail(message: str) -> NoReturn:
sys.exit(message)


def get_mypy_config(paths: List[str],
mypy_options: Optional[List[str]],
compiler_options: CompilerOptions) -> Tuple[List[BuildSource], Options]:
def get_mypy_config(mypy_options: List[str],
only_compile_paths: Optional[Iterable[str]],
compiler_options: CompilerOptions,
fscache: Optional[FileSystemCache],
) -> Tuple[List[BuildSource], List[BuildSource], Options]:
"""Construct mypy BuildSources and Options from file and options lists"""
# It is kind of silly to do this but oh well
mypy_options = mypy_options or []
mypy_options.append('--')
mypy_options.extend(paths)
all_sources, options = process_options(mypy_options, fscache=fscache)
if only_compile_paths:
paths_set = set(only_compile_paths)
mypyc_sources = [s for s in all_sources if s.path in paths_set]
else:
mypyc_sources = all_sources

sources, options = process_options(mypy_options)
if not mypyc_sources:
return mypyc_sources, all_sources, options

# Override whatever python_version is inferred from the .ini file,
# and set the python_version to be the currently used version.
Expand All @@ -104,10 +110,10 @@ def get_mypy_config(paths: List[str],
options.incremental = compiler_options.separate
options.preserve_asts = True

for source in sources:
for source in mypyc_sources:
options.per_module_options.setdefault(source.module, {})['mypyc'] = True

return sources, options
return mypyc_sources, all_sources, options


shim_template = """\
Expand Down Expand Up @@ -170,6 +176,7 @@ def include_dir() -> str:
def generate_c(sources: List[BuildSource],
options: Options,
groups: emitmodule.Groups,
fscache: FileSystemCache,
compiler_options: Optional[CompilerOptions] = None
) -> Tuple[List[List[Tuple[str, str]]], str]:
"""Drive the actual core compilation step.
Expand All @@ -185,7 +192,8 @@ def generate_c(sources: List[BuildSource],
# Do the actual work now
t0 = time.time()
try:
result = emitmodule.parse_and_typecheck(sources, options, groups)
result = emitmodule.parse_and_typecheck(
sources, options, compiler_options, groups, fscache)
except CompileError as e:
for line in e.messages:
print(line)
Expand All @@ -195,10 +203,6 @@ def generate_c(sources: List[BuildSource],
if compiler_options.verbose:
print("Parsed and typechecked in {:.3f}s".format(t1 - t0))

all_module_names = []
for group_sources, _ in groups:
all_module_names.extend([source.module for source in group_sources])

errors = Errors()

modules, ctext = emitmodule.compile_modules_to_c(result,
Expand Down Expand Up @@ -293,6 +297,7 @@ def write_file(path: str, contents: str) -> None:
except IOError:
old_contents = None
if old_contents != encoded_contents:
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'wb') as f:
f.write(encoded_contents)

Expand Down Expand Up @@ -363,24 +368,29 @@ def get_header_deps(cfiles: List[Tuple[str, str]]) -> List[str]:

def mypycify(
paths: List[str],
mypy_options: Optional[List[str]] = None,
*,
only_compile_paths: Optional[Iterable[str]] = None,
verbose: bool = False,
opt_level: str = '3',
strip_asserts: bool = False,
multi_file: bool = False,
separate: Union[bool, List[Tuple[List[str], Optional[str]]]] = False,
skip_cgen_input: Optional[Any] = None
skip_cgen_input: Optional[Any] = None,
target_dir: Optional[str] = None,
include_runtime_files: Optional[bool] = None
) -> List['Extension']:
"""Main entry point to building using mypyc.
This produces a list of Extension objects that should be passed as the
ext_modules parameter to setup.
Arguments:
paths: A list of file paths to build. It may contain globs.
mypy_options: Optionally, a list of command line flags to pass to mypy.
(This can also contain additional files, for compatibility reasons.)
paths: A list of file paths to build. It may also contain mypy options.
only_compile_paths: If not None, an iterable of paths that are to be
the only modules compiled, even if other modules
appear in the mypy command line given to paths.
(These modules must still be passed to paths.)
verbose: Should mypyc be more verbose. Defaults to false.
opt_level: The optimization level, as a string. Defaults to '3' (meaning '-O3').
Expand All @@ -401,6 +411,11 @@ def mypycify(
speed up compilation, but calls between groups can
be slower than calls within a group and can't be
inlined.
target_dir: The directory to write C output files. Defaults to 'build'.
include_runtime_files: If not None, whether the mypyc runtime library
should be directly #include'd instead of linked
separately in order to reduce compiler invocations.
Defaults to False in multi_file mode, True otherwise.
"""

setup_mypycify_vars()
Expand All @@ -409,6 +424,8 @@ def mypycify(
multi_file=multi_file,
verbose=verbose,
separate=separate is not False,
target_dir=target_dir,
include_runtime_files=include_runtime_files,
)

# Create a compiler object so we can make decisions based on what
Expand All @@ -417,32 +434,25 @@ def mypycify(
compiler = ccompiler.new_compiler() # type: Any
sysconfig.customize_compiler(compiler)

expanded_paths = []
for path in paths:
expanded_paths.extend(glob.glob(path))

build_dir = BUILD_DIR # TODO: can this be overridden??
try:
os.mkdir(build_dir)
except FileExistsError:
pass
build_dir = compiler_options.target_dir

sources, options = get_mypy_config(expanded_paths, mypy_options, compiler_options)
fscache = FileSystemCache()
mypyc_sources, all_sources, options = get_mypy_config(
paths, only_compile_paths, compiler_options, fscache)
# We generate a shared lib if there are multiple modules or if any
# of the modules are in package. (Because I didn't want to fuss
# around with making the single module code handle packages.)
use_shared_lib = len(sources) > 1 or any('.' in x.module for x in sources)
use_shared_lib = len(mypyc_sources) > 1 or any('.' in x.module for x in mypyc_sources)

groups = construct_groups(sources, separate, use_shared_lib)
groups = construct_groups(mypyc_sources, separate, use_shared_lib)

# We let the test harness just pass in the c file contents instead
# so that it can do a corner-cutting version without full stubs.
if not skip_cgen_input:
group_cfiles, ops_text = generate_c(sources, options, groups,
group_cfiles, ops_text = generate_c(all_sources, options, groups, fscache,
compiler_options=compiler_options)
# TODO: unique names?
with open(os.path.join(build_dir, 'ops.txt'), 'w') as f:
f.write(ops_text)
write_file(os.path.join(build_dir, 'ops.txt'), ops_text)
else:
group_cfiles = skip_cgen_input

Expand Down Expand Up @@ -487,10 +497,11 @@ def mypycify(
'/wd9025', # warning about overriding /GL
]

# In multi-file mode, copy the runtime library in.
# Otherwise it just gets #included to save on compiler invocations
# If configured to (defaults to yes in multi-file mode), copy the
# runtime library in. Otherwise it just gets #included to save on
# compiler invocations.
shared_cfilenames = []
if multi_file:
if not compiler_options.include_runtime_files:
for name in ['CPy.c', 'getargs.c']:
rt_file = os.path.join(build_dir, name)
with open(os.path.join(include_dir(), name), encoding='utf-8') as f:
Expand Down
2 changes: 0 additions & 2 deletions mypyc/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
if MYPY:
from typing_extensions import Final

BUILD_DIR = 'build'

PREFIX = 'CPyPy_' # type: Final # Python wrappers
NATIVE_PREFIX = 'CPyDef_' # type: Final # Native functions etc.
DUNDER_PREFIX = 'CPyDunder_' # type: Final # Wrappers for exposing dunder methods to the API
Expand Down
47 changes: 27 additions & 20 deletions mypyc/emitmodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
from mypy.errors import CompileError
from mypy.options import Options
from mypy.plugin import Plugin, ReportConfigContext
from mypy.fscache import FileSystemCache

from mypyc import genops
from mypyc.common import (
BUILD_DIR, PREFIX, TOP_LEVEL_NAME, INT_PREFIX, MODULE_PREFIX, shared_lib_name,
PREFIX, TOP_LEVEL_NAME, INT_PREFIX, MODULE_PREFIX, shared_lib_name,
)
from mypyc.emit import EmitterContext, Emitter, HeaderDeclaration
from mypyc.emitfunc import generate_native_function, native_function_header
Expand Down Expand Up @@ -86,14 +87,16 @@ class MypycPlugin(Plugin):
recompile the module so we mark it as stale.
"""

def __init__(self, options: Options, groups: Groups) -> None:
def __init__(
self, options: Options, compiler_options: CompilerOptions, groups: Groups) -> None:
super().__init__(options)
self.group_map = {} # type: Dict[str, Tuple[Optional[str], List[str]]]
for sources, name in groups:
modules = sorted(source.module for source in sources)
for id in modules:
self.group_map[id] = (name, modules)

self.compiler_options = compiler_options
self.metastore = create_metastore(options)

def report_config_data(
Expand Down Expand Up @@ -136,7 +139,7 @@ def report_config_data(
# .mypy_cache, which we should handle gracefully.
for path, hash in ir_data['src_hashes'].items():
try:
with open(os.path.join(BUILD_DIR, path), 'rb') as f:
with open(os.path.join(self.compiler_options.target_dir, path), 'rb') as f:
contents = f.read()
except FileNotFoundError:
return None
Expand All @@ -151,14 +154,20 @@ def get_additional_deps(self, file: MypyFile) -> List[Tuple[int, str, int]]:
return [(10, id, -1) for id in self.group_map.get(file.fullname(), (None, []))[1]]


def parse_and_typecheck(sources: List[BuildSource], options: Options,
groups: Groups,
alt_lib_path: Optional[str] = None) -> BuildResult:
def parse_and_typecheck(
sources: List[BuildSource],
options: Options,
compiler_options: CompilerOptions,
groups: Groups,
fscache: Optional[FileSystemCache] = None,
alt_lib_path: Optional[str] = None
) -> BuildResult:
assert options.strict_optional, 'strict_optional must be turned on'
result = build(sources=sources,
options=options,
alt_lib_path=alt_lib_path,
extra_plugins=[MypycPlugin(options, groups)])
fscache=fscache,
extra_plugins=[MypycPlugin(options, compiler_options, groups)])
if result.errors:
raise CompileError(result.errors)
return result
Expand Down Expand Up @@ -273,8 +282,9 @@ def compile_ir_to_c(
continue
literals = mapper.literals[group_name]
generator = GroupGenerator(
literals, group_modules, source_paths, group_name, mapper.group_map, names,
compiler_options.multi_file
literals, group_modules, source_paths,
group_name, mapper.group_map, names,
compiler_options
)
ctext[group_name] = generator.generate_c_for_modules()

Expand Down Expand Up @@ -406,16 +416,13 @@ def generate_function_declaration(fn: FuncIR, emitter: Emitter) -> None:

def encode_as_c_string(s: str) -> Tuple[str, int]:
"""Produce a utf-8 encoded, escaped, quoted C string and its size from a string"""
# This is a kind of abusive way to do this...
b = s.encode('utf-8')
escaped = str(b)[2:-1].replace('"', '\\"')
return '"{}"'.format(escaped), len(b)
return encode_bytes_as_c_string(s.encode('utf-8'))


def encode_bytes_as_c_string(b: bytes) -> Tuple[str, int]:
"""Produce a single-escaped, quoted C string and its size from a bytes"""
# This is a kind of abusive way to do this...
escaped = str(b)[2:-1].replace('"', '\\"')
escaped = repr(b)[2:-1].replace('"', '\\"')
return '"{}"'.format(escaped), len(b)


Expand All @@ -438,7 +445,7 @@ def __init__(self,
group_name: Optional[str],
group_map: Dict[str, Optional[str]],
names: NameGenerator,
multi_file: bool) -> None:
compiler_options: CompilerOptions) -> None:
"""Generator for C source for a compilation group.
The code for a compilation group contains an internal and an
Expand All @@ -465,7 +472,8 @@ def __init__(self,
self.simple_inits = [] # type: List[Tuple[str, str]]
self.group_name = group_name
self.use_shared_lib = group_name is not None
self.multi_file = multi_file
self.compiler_options = compiler_options
self.multi_file = compiler_options.multi_file

@property
def group_suffix(self) -> str:
Expand All @@ -476,10 +484,9 @@ def generate_c_for_modules(self) -> List[Tuple[str, str]]:
multi_file = self.use_shared_lib and self.multi_file

base_emitter = Emitter(self.context)
# When not in multi-file mode we just include the runtime
# library c files to reduce the number of compiler invocations
# needed
if not self.multi_file:
# Optionally just include the runtime library c files to
# reduce the number of compiler invocations needed
if self.compiler_options.include_runtime_files:
base_emitter.emit_line('#include "CPy.c"')
base_emitter.emit_line('#include "getargs.c"')
base_emitter.emit_line('#include "__native{}.h"'.format(self.group_suffix))
Expand Down
11 changes: 10 additions & 1 deletion mypyc/options.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
from typing import Optional


class CompilerOptions:
def __init__(self, strip_asserts: bool = False, multi_file: bool = False,
verbose: bool = False, separate: bool = False) -> None:
verbose: bool = False, separate: bool = False,
target_dir: Optional[str] = None,
include_runtime_files: Optional[bool] = None) -> None:
self.strip_asserts = strip_asserts
self.multi_file = multi_file
self.verbose = verbose
self.separate = separate
self.global_opts = not separate
self.target_dir = target_dir or 'build'
self.include_runtime_files = (
include_runtime_files if include_runtime_files is not None else not multi_file
)
3 changes: 2 additions & 1 deletion mypyc/test/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,13 +202,14 @@ def run_case_step(self, testcase: DataDrivenTestCase, incremental_step: int) ->
groups = construct_groups(sources, separate, len(module_names) > 1)

try:
compiler_options = CompilerOptions(multi_file=self.multi_file, separate=self.separate)
result = emitmodule.parse_and_typecheck(
sources=sources,
options=options,
compiler_options=compiler_options,
groups=groups,
alt_lib_path='.')
errors = Errors()
compiler_options = CompilerOptions(multi_file=self.multi_file, separate=self.separate)
ir, cfiles = emitmodule.compile_modules_to_c(
result,
compiler_options=compiler_options,
Expand Down
Loading

0 comments on commit c6c99b9

Please sign in to comment.