From cdb84fd4bd0ae86d3a84ad81f299a4116ceb2fae Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Thu, 25 Mar 2021 17:24:53 -0600 Subject: catch up with rust version, mostly --- .gitignore | 138 ++++++++++++++ README.md | 10 + pyproject.toml | 6 + setup.cfg | 23 +++ setup.py | 2 + yapymake/__init__.py | 14 ++ yapymake/args.py | 116 ++++++++++++ yapymake/makefile/__init__.py | 363 +++++++++++++++++++++++++++++++++++++ yapymake/makefile/parse_util.py | 188 +++++++++++++++++++ yapymake/makefile/token.py | 106 +++++++++++ yapymake/util/__init__.py | 1 + yapymake/util/peekable_iterator.py | 30 +++ 12 files changed, 997 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 pyproject.toml create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 yapymake/__init__.py create mode 100644 yapymake/args.py create mode 100644 yapymake/makefile/__init__.py create mode 100644 yapymake/makefile/parse_util.py create mode 100644 yapymake/makefile/token.py create mode 100644 yapymake/util/__init__.py create mode 100644 yapymake/util/peekable_iterator.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a81c8ee --- /dev/null +++ b/.gitignore @@ -0,0 +1,138 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..8023543 --- /dev/null +++ b/README.md @@ -0,0 +1,10 @@ +# yapymake + +A (mostly) [POSIX-compatible](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/make.html) make implemented in Python. + +## conformance + +- internationalization (`LANG`/`LC_ALL`/`LC_CTYPE`/`LC_MESSAGES`) not implemented +- XSI conformance (SCCS integration) not implemented +- signal handling not implemented +- library handling not implemented diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..374b58c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel" +] +build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..8aed49d --- /dev/null +++ b/setup.cfg @@ -0,0 +1,23 @@ +[metadata] +name = yapymake +version = attr: yapymake.VERSION +url = https://nest.pijul.com/boringcactus/yapymake:canon +author = Melody Horn / boringcactus +author_email = melody@boringcactus.com +classifiers = + Environment :: Console + Intended Audience :: Developers + License :: Other/Proprietary License + Operating System :: OS Independent + Topic :: Software Development :: Build Tools +# license_files = +description = attr: yapymake.DESCRIPTION +long_description = file: README.md +keywords = build, make + +[options] +packages = yapymake + +[options.entry_points] +console_scripts = + yapymake = yapymake:main diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a4f49f9 --- /dev/null +++ b/setup.py @@ -0,0 +1,2 @@ +import setuptools +setuptools.setup() diff --git a/yapymake/__init__.py b/yapymake/__init__.py new file mode 100644 index 0000000..34221f2 --- /dev/null +++ b/yapymake/__init__.py @@ -0,0 +1,14 @@ +VERSION = '0.1.0' +DESCRIPTION = 'A (mostly) POSIX-compatible make implemented in Python' + +from .args import parse +from .makefile import Makefile + +def main(): + these_args = parse() + file = Makefile(these_args) + # TODO dump command line into MAKEFLAGS + # TODO dump command line macros into environment + # TODO handle SHELL + for input_file in these_args.makefile: + file.read(input_file) diff --git a/yapymake/args.py b/yapymake/args.py new file mode 100644 index 0000000..f13f8e0 --- /dev/null +++ b/yapymake/args.py @@ -0,0 +1,116 @@ +import argparse +from dataclasses import dataclass +import io +import os +import sys +from typing import List, TextIO + +from . import DESCRIPTION, VERSION + +__all__ = [ + 'parse', + 'Args', +] + +parser = argparse.ArgumentParser( + description=f'{DESCRIPTION} - version {VERSION}', +) + +parser.add_argument('--environment-overrides', '-e', + action='store_true', + help='Cause environment variables, including those with null values, to override macro assignments ' + 'within makefiles.') +parser.add_argument('--makefile', '--file', '-f', + action='append', + type=argparse.FileType('r'), + help="Specify a different makefile (or '-' for standard input).") +parser.add_argument('--ignore-errors', '-i', + action='store_true', + help='Ignore error codes returned by invoked commands.') +parser.add_argument('--keep-going', '-k', + action='store_true', + help='Continue to update other targets that do not depend on the current target if a non-ignored ' + 'error occurs while executing the commands to bring a target up-to-date.') +parser.add_argument('--dry-run', '--just-print', '--recon', '-n', + action='store_true', + help="Write commands that would be executed on standard output, but do not execute them (but " + "execute lines starting with '+').") +parser.add_argument('--print-everything', '--print-data-base', '-p', + action='store_true', + help='Write to standard output the complete set of macro definitions and target descriptions.') +parser.add_argument('--question', '-q', + action='store_true', + help='Return a zero exit value if the target file is up-to-date; otherwise, return an exit value ' + 'of 1.') +parser.add_argument('--no-builtin-rules', '-r', + action='store_false', + dest='builtin_rules', + help='Clear the suffix list and do not use the built-in rules.') +parser.add_argument('--no-keep-going', '--stop', '-S', + action='store_false', + dest='keep_going', + help='Terminate make if an error occurs while executing the commands to bring a target up-to-date ' + '(default behavior, required by POSIX to be also a flag for some reason).') +parser.add_argument('--silent', '--quiet', '-s', + action='store_true', + help='Do not write makefile command lines or touch messages to standard output before executing.') +parser.add_argument('--touch', '-t', + action='store_true', + help='Update the modification time of each target as though a touch target had been executed.') +parser.add_argument('targets_or_macros', + nargs='*', + metavar='target_or_macro', + help='Target name or macro definition.') + +@dataclass() +class Args: + environment_overrides: bool + makefile: List[TextIO] + ignore_errors: bool + keep_going: bool + dry_run: bool + print_everything: bool + question: bool + builtin_rules: bool + silent: bool + touch: bool + targets_or_macros: List[str] + + def __init__(self, parsed_args: argparse.Namespace): + self.environment_overrides = parsed_args.environment_overrides + if len(parsed_args.makefile) > 0: + self.makefile = parsed_args.makefile + else: + try: + self.makefile = [open('./makefile', 'r')] + except FileNotFoundError: + self.makefile = [open('./Makefile', 'r')] + self.ignore_errors = parsed_args.ignore_errors + self.keep_going = parsed_args.keep_going + self.dry_run = parsed_args.dry_run + self.print_everything = parsed_args.print_everything + self.question = parsed_args.question + self.builtin_rules = parsed_args.builtin_rules + self.silent = parsed_args.silent + self.touch = parsed_args.touch + self.targets_or_macros = parsed_args.targets_or_macros + +def parse(cli_args: List[str] = None, env_makeflags: str = None) -> Args: + if cli_args is None: + cli_args = sys.argv[1:] + if env_makeflags is None: + env_makeflags = os.environ.get('MAKEFLAGS', '') + + # per POSIX, we accept option letters without a leading -, so to simplify we prepend a - now + # TODO allow macro definitions in MAKEFLAGS + if len(env_makeflags) > 0 and not env_makeflags.startswith('-'): + env_makeflags = '-' + env_makeflags + + if len(env_makeflags) > 0: + all_args = [env_makeflags, *cli_args] + else: + all_args = cli_args + + return Args(parser.parse_args(all_args)) + +# TODO test any of this diff --git a/yapymake/makefile/__init__.py b/yapymake/makefile/__init__.py new file mode 100644 index 0000000..051a9f9 --- /dev/null +++ b/yapymake/makefile/__init__.py @@ -0,0 +1,363 @@ +from dataclasses import dataclass +import enum +import os +from pathlib import Path as ImpurePath, PurePath +import re +import subprocess +import sys +from typing import Dict, List, Optional, TextIO, Tuple + +from .token import * +from ..args import Args +from ..util import PeekableIterator + +__all__ = [ + 'Makefile', +] + +@dataclass() +class Makefile: + _inference_rules: List['InferenceRule'] + _macros: Dict[str, Tuple['MacroSource', TokenString]] + _targets: Dict[str, 'Target'] + args: Args + + def __init__(self, args: Args): + self._inference_rules = [] + self._macros = dict() + self._targets = dict() + self.args = args + + if args.builtin_rules: + self._inference_rules += BUILTIN_INFERENCE_RULES + self._macros.update(BUILTIN_MACROS) + self._targets.update(BUILTIN_TARGETS) + + for k, v in os.environ.items(): + if k not in ['MAKEFLAGS', 'SHELL']: + self._macros[k] = (MacroSource.Environment, TokenString([TextToken(v)])) + + def read(self, file: TextIO): + lines_iter: PeekableIterator[str] = PeekableIterator(iter(file)) + for line in lines_iter: + # handle escaped newlines (POSIX says these are different in command lines (which we handle later) and + # does not define if they are different in include lines (so we treat them as the same) + while line.endswith('\\\n'): + line = line[:-2] + next(lines_iter, '').lstrip() + + # POSIX: + # > If the word include appears at the beginning of a line and is followed by one or more + # > characters... + if line.startswith('include '): + # > the string formed by the remainder of the line... + line = line[len('include '):].lstrip() + # > shall be processed as follows to produce a pathname: + + # > The trailing , any characters immediately preceding a comment, and any comment + # > shall be discarded. + line = re.sub(r'(\s+#.*)?\n', '', line) + + # > The resulting string shall be processed for macro expansion. + line = self.expand_macros(tokenize(line)) + + # > Any characters that appear after the first non- shall be used as separators to + # > divide the macro-expanded string into fields. + fields = line.split() + + # > If the processing of separators and optional pathname expansion results in either zero or two or + # > more non-empty fields, the behavior is unspecified. If it results in one non-empty field, that + # > field is taken as the pathname. + # (GNU make will include each field separately, so let's do that here) + for included_file in fields: + # > The contents of the file specified by the pathname shall be read and processed as if they + # > appeared in the makefile in place of the include line. + self.read(open(included_file, 'r')) + + # make sure we don't process an ambiguous line as both an include and something else + continue + + # decide if this is a macro or rule + line_type = 'unknown' + line_tokens = tokenize(line) + for t in line_tokens: + if isinstance(t, TextToken): + if ':' in t.text and ('=' not in t.text or t.text.index(':') < t.text.index('=')): + line_type = 'rule' + break + elif '=' in t.text and (':' not in t.text or t.text.index('=') < t.text.index(':')): + line_type = 'macro' + break + + if line_type == 'rule': + # > Target entries are specified by a -separated, non-null list of targets, then a , then + # > a -separated, possibly empty list of prerequisites. + targets, after_colon = line_tokens.split_once(':') + targets = self.expand_macros(targets).split() + # > Text following a , if any, and all following lines that begin with a , are makefile + # > command lines to be executed to update the target. + semicolon_split = after_colon.split_once(';') + if semicolon_split is None: + prerequisites = self.expand_macros(after_colon).split() + commands = [] + else: + prerequisites, commands = semicolon_split + prerequisites = self.expand_macros(prerequisites).split() + commands = [commands] + while lines_iter.peek().startswith('\t'): + commands.append(tokenize(next(lines_iter).lstrip('\t'))) + commands = [CommandLine(c) for c in commands] + + # we don't know yet if it's a target rule or an inference rule + match = re.fullmatch(r'(?P(\.[^/.]+)?)(?P\.[^/.]+)', targets[0]) + if len(targets) == 1 and len(prerequisites) == 0 and match is not None: + # it's an inference rule! + self._inference_rules.append(InferenceRule(match.group('s1'), match.group('s2'), commands)) + else: + # it's a target rule! + for target in targets: + # > A target that has prerequisites, but does not have any commands, can be used to add to the + # > prerequisite list for that target. + if target in self._targets and len(commands) == 0: + self._targets[target].prerequisites += prerequisites + else: + self._targets[target] = Target(target, prerequisites, commands) + elif line_type == 'macro': + # > The macro named string1 is defined as having the value of string2, where string2 is defined as all + # > characters, if any, after the ... + name, value = line_tokens.split_once('=') + # > up to a comment character ( '#' ) or an unescaped . + comment_split = value.split_once('#') + if comment_split is not None: + value, _ = comment_split + # > Any characters immediately before or after the shall be ignored. + name.rstrip() + value.lstrip() + # > Macros in the string before the in a macro definition shall be evaluated when the + # > macro assignment is made. + name = self.expand_macros(name) + # > Macros defined in the makefile(s) shall override macro definitions that occur before them in the + # > makefile(s) and macro definitions from source 4. If the -e option is not specified, macros defined + # > in the makefile(s) shall override macro definitions from source 3. Macros defined in the makefile(s) + # > shall not override macro definitions from source 1 or source 2. + if name in self._macros: + source, _ = self._macros[name] + inviolate_sources = [MacroSource.CommandLine, MacroSource.MAKEFLAGS] + if self.args.environment_overrides: + inviolate_sources.append(MacroSource.Environment) + if source in inviolate_sources: + continue + self._macros[name] = (MacroSource.File, value) + + def expand_macros(self, text: TokenString, current_target: Optional['Target']) -> str: + def expand_one(this_token: Token) -> str: + if isinstance(this_token, TextToken): + return this_token.text + elif isinstance(this_token, MacroToken): + macro_name = this_token.name + internal_macro = len(macro_name) in [1, 2] and macro_name[0] in '@?<*' and \ + macro_name[1:] in ['', 'D', 'F'] + if internal_macro: + if macro_name[0] == '@': + # > The $@ shall evaluate to the full target name of the current target, or the archive filename + # > part of a library archive target. It shall be evaluated for both target and inference rules. + macro_value = [current_target.name] + elif macro_name[0] == '?': + # > The $? macro shall evaluate to the list of prerequisites that are newer than the current + # > target. It shall be evaluated for both target and inference rules. + macro_value = [p for p in current_target.prerequisites if self.target(p).newer_than(current_target)] + elif macro_name[0] == '<': + # > In an inference rule, the $< macro shall evaluate to the filename whose existence allowed + # > the inference rule to be chosen for the target. In the .DEFAULT rule, the $< macro shall + # > evaluate to the current target name. + macro_value = current_target.prerequisites + elif macro_name[0] == '*': + # > The $* macro shall evaluate to the current target name with its suffix deleted. + macro_value = [str(PurePath(current_target.name).with_suffix(''))] + else: + # this shouldn't happen + macro_value = [] + + if macro_name[1:] == 'D': + macro_value = [str(PurePath(x).parent) for x in macro_value] + elif macro_name[1:] == 'F': + macro_value = [str(PurePath(x).name) for x in macro_value] + + macro_value = TokenString([TextToken(' '.join(macro_value))]) + else: + _, macro_value = self._macros[this_token.name] + macro_value = self.expand_macros(macro_value, current_target) + if this_token.replacement is not None: + replaced, replacement = (self.expand_macros(t, current_target) for t in this_token.replacement) + macro_value = re.sub(re.escape(replaced) + r'\b', replacement, macro_value) + return macro_value + + return ''.join(expand_one(t) for t in text) + + def special_target(self, name: str) -> Optional['Target']: + return self._targets.get(name, None) + + def special_target_has_prereq(self, target: str, name: str) -> bool: + target = self.special_target(target) + if target is None: + return False + return len(target.prerequisites) == 0 or name in target.prerequisites + + def target(self, name: str) -> 'Target': + # TODO implement .DEFAULT + if name not in self._targets: + # > When no target rule is found to update a target, the inference rules shall be checked. The suffix of + # > the target (.s1) to be built... + suffix = PurePath(name).suffix + # > is compared to the list of suffixes specified by the .SUFFIXES special targets. If the .s1 suffix is + # > found in .SUFFIXES... + # (single-suffix rules apply to targets with no suffix so we just throw that in) + if self.special_target_has_prereq('.SUFFIXES', suffix) or suffix == '': + # > the inference rules shall be searched in the order defined... + for rule in self._inference_rules: + # > for the first .s2.s1 rule... + if rule.s1 == suffix: + # > whose prerequisite file ($*.s2) exists. + prerequisite_path = PurePath(name).with_suffix(rule.s2) + if ImpurePath(prerequisite_path).exists(): + self._targets[name] = Target(name, [str(prerequisite_path)], rule.commands) + break + if name not in self._targets: + # we tried inference, it didn't work + # is there a default? + default = self.special_target('.DEFAULT') + if default is not None: + self._targets[name] = Target(name, [], default.commands) + else: + # well, there's no rule available, and no default. does it already exist? + if ImpurePath(name).exists(): + # it counts as already up to date + self._targets[name] = Target(name, [], [], True) + return self._targets[name] + +@dataclass() +class InferenceRule: + s1: str # empty string means single-suffix rule + s2: str + commands: List['CommandLine'] + +@dataclass() +class Target: + name: str + prerequisites: List[str] + commands: List['CommandLine'] + already_updated: bool = False + + def _path(self) -> ImpurePath: + return ImpurePath(self.name) + + def modified_time(self) -> Optional[float]: + path = self._path() + if path.exists(): + return path.stat().st_mtime + + def newer_than(self, other: 'Target') -> Optional[bool]: + self_mtime = self.modified_time() + other_mtime = other.modified_time() + if self_mtime is not None and other_mtime is not None: + return self_mtime >= other_mtime + elif self_mtime is None and self.already_updated and self.name in other.prerequisites: + return True + elif other_mtime is None and other.already_updated and other.name in self.prerequisites: + return False + + def is_up_to_date(self, file: Makefile) -> bool: + if self.already_updated: + return True + exists = self._path().exists() + newer_than_all_dependencies = all(self.newer_than(file.target(other)) for other in self.prerequisites) + return exists and newer_than_all_dependencies + + def update(self, file: Makefile): + for prerequisite in self.prerequisites: + file.target(prerequisite).update(file) + if not self.is_up_to_date(file): + self.execute_commands(file) + self.already_updated = True + + def execute_commands(self, file: Makefile): + for command in self.commands: + command.execute(file, self) + +@dataclass() +class MacroSource(enum.Enum): + File = 0 + CommandLine = 1 + MAKEFLAGS = 2 + Environment = 3 + Builtin = 4 + +@dataclass() +class CommandLine: + ignore_errors: bool + silent: bool + always_execute: bool + execution_line: TokenString + + def __init__(self, line: TokenString): + self.ignore_errors = False + self.silent = False + self.always_execute = False + + # POSIX: + # > An execution line is built from the command line by removing any prefix characters. + tokens_iter = iter(line) + first_token = next(tokens_iter) + if isinstance(first_token, TextToken): + while first_token.text[0] in ['-', '@', '+']: + if first_token.text[0] == '-': + self.ignore_errors = True + elif first_token.text[0] == '@': + self.silent = True + elif first_token.text[0] == '+': + self.always_execute = True + first_token.text = first_token.text[1:] + self.execution_line = TokenString(list((first_token, *tokens_iter))) + + def execute(self, file: Makefile, current_target: 'Target'): + # POSIX: + # > If the command prefix contains a , or the -i option is present, or the special target .IGNORE + # > has either the current target as a prerequisite or has no prerequisites, any error found while executing + # > the command shall be ignored. + ignore_errors = self.ignore_errors or \ + file.args.ignore_errors or \ + file.special_target_has_prereq('.IGNORE', current_target.name) + + # > If the command prefix contains an at-sign and the make utility command line -n option is not specified, or + # > the -s option is present, or the special target .SILENT has either the current target as a prerequisite or + # > has no prerequisites, the command shall not be written to standard output before it is executed. + silent = self.silent and not file.args.dry_run or \ + file.args.silent or \ + file.special_target_has_prereq('.SILENT', current_target.name) + + # > If the command prefix contains a , this indicates a makefile command line that shall be executed + # > even if -n, -q, or -t is specified. + should_execute = self.always_execute or not (file.args.dry_run or file.args.question or file.args.touch) + if not should_execute: + return + + execution_line = file.expand_macros(self.execution_line, current_target) + + # > Except as described under the at-sign prefix... + if not silent: + # > the execution line shall be written to the standard output. + print(execution_line) + + # > The execution line shall then be executed by a shell as if it were passed as the argument to the system() + # > interface, except that if errors are not being ignored then the shell -e option shall also be in effect. + # TODO figure out how to pass -e to the shell reliably + result = subprocess.call(execution_line, shell=True) + + # > By default, when make receives a non-zero status from the execution of a command, it shall terminate with + # > an error message to standard error. + if not ignore_errors and result != 0: + print('error!', file=sys.stderr) + sys.exit(1) + +BUILTIN_INFERENCE_RULES = [] +BUILTIN_MACROS = {} +BUILTIN_TARGETS = {} diff --git a/yapymake/makefile/parse_util.py b/yapymake/makefile/parse_util.py new file mode 100644 index 0000000..ee99831 --- /dev/null +++ b/yapymake/makefile/parse_util.py @@ -0,0 +1,188 @@ +from typing import Callable, List, Optional, Tuple, TypeVar + +__all__ = [ + 'ParseResult', + 'Parser', + 'alt', + 'tag', + 'take_till1', + 'take_while1', + 'any_char', + 'all_consuming', + 'map_parser', + 'opt', + 'verify', + 'many1', + 'delimited', + 'pair', + 'preceded', + 'separated_pair', +] + +# I had a really nice (well, not really, but it worked) implementation of this with Rust's `nom`, +# but then I surrendered to the borrow checker, so now I have to redo that work in a more Pythonic +# way. So I'm reimplementing the nom pieces I used in Python, because fuck it. + +T = TypeVar('T') +T1 = TypeVar('T1') +T2 = TypeVar('T2') + +ParseResult = Optional[Tuple[T, str]] +Parser = Callable[[str], ParseResult[T]] + +def alt(*parsers: Parser[T]) -> Parser[T]: + def parse(text: str) -> ParseResult[T]: + for parser in parsers[:-1]: + result = parser(text) + if result is not None: + return result + return parsers[-1](text) + return parse + +def tag(tag_text: str) -> Parser[None]: + def parse(text: str) -> ParseResult[None]: + if text.startswith(tag_text): + return None, text[len(tag_text):] + return parse + +def take_while1(predicate: Callable[[str], bool]) -> Parser[str]: + def parse(text: str) -> ParseResult[str]: + if not predicate(text[0]): + return None + for i in range(1, len(text)): + if not predicate(text[i]): + return text[:i], text[i:] + return text, "" + return parse + +def take_till1(predicate: Callable[[str], bool]) -> Parser[str]: + return take_while1(lambda x: not predicate(x)) + +def any_char(text: str) -> ParseResult[str]: + if len(text) > 0: + return text[0], text[1:] + +def all_consuming(parser: Parser[T]) -> Parser[T]: + def parse(text: str) -> ParseResult[T]: + result = parser(text) + if result is None: + return None + result, extra = result + if len(extra) > 0: + return None + return result + return parse + +def map_parser(parser: Parser[T1], mapper: Callable[[T1], T2]) -> Parser[T2]: + def parse(text: str) -> ParseResult[T2]: + result = parser(text) + if result is None: + return None + result, extra = result + return mapper(result), extra + return parse + +def opt(parser: Parser[T]) -> Parser[Optional[T]]: + def parse(text: str) -> ParseResult[Optional[T]]: + result = parser(text) + if result is None: + return None, text + return result + return parse + +def verify(parser: Parser[T], predicate: Callable[[T], bool]) -> Parser[T]: + def parse(text: str) -> ParseResult[T]: + result = parser(text) + if result is None: + return None + result, extra = result + if predicate(result): + return result, extra + return None + return parse + +def many1(parser: Parser[T]) -> Parser[List[T]]: + def parse(text: str) -> ParseResult[List[T]]: + parser_result = parser(text) + if parser_result is None: + return None + parser_result, extra = parser_result + result = [parser_result] + + parser_result = parser(extra) + while parser_result is not None: + parser_result, extra = parser_result + result.append(parser_result) + parser_result = parser(extra) + return result, extra + return parse + +def delimited(before_parser: Parser[T1], parser: Parser[T], after_parser: Parser[T2]) -> Parser[T]: + def parse(text: str) -> ParseResult[T]: + before_result = before_parser(text) + if before_result is None: + return None + _, extra = before_result + + result = parser(extra) + if result is None: + return None + result, extra = result + + after_result = after_parser(extra) + if after_result is None: + return None + _, extra = after_result + + return result, extra + return parse + +def pair(first_parser: Parser[T1], second_parser: Parser[T2]) -> Parser[Tuple[T1, T2]]: + def parse(text: str) -> ParseResult[Tuple[T1, T2]]: + first_result = first_parser(text) + if first_result is None: + return None + first_result, extra = first_result + + second_result = second_parser(extra) + if second_result is None: + return None + second_result, extra = second_result + + return (first_result, second_result), extra + return parse + +def preceded(before_parser: Parser[T1], parser: Parser[T]) -> Parser[T]: + def parse(text: str) -> ParseResult[T]: + before_result = before_parser(text) + if before_result is None: + return None + _, extra = before_result + + result = parser(extra) + if result is None: + return None + result, extra = result + + return result, extra + return parse + +def separated_pair(first_parser: Parser[T1], between_parser: Parser[T], second_parser: Parser[T2]) -> Parser[Tuple[T1, T2]]: + def parse(text: str) -> ParseResult[Tuple[T1, T2]]: + first_result = first_parser(text) + if first_result is None: + return None + first_result, extra = first_result + + between_result = between_parser(extra) + if between_result is None: + return None + _, extra = between_result + + second_result = second_parser(extra) + if second_result is None: + return None + second_result, extra = second_result + + return (first_result, second_result), extra + return parse diff --git a/yapymake/makefile/token.py b/yapymake/makefile/token.py new file mode 100644 index 0000000..bd673f7 --- /dev/null +++ b/yapymake/makefile/token.py @@ -0,0 +1,106 @@ +from dataclasses import dataclass +from typing import Iterable, Iterator, List, Optional, Tuple + +from .parse_util import * + +__all__ = [ + 'TokenString', + 'Token', + 'MacroToken', + 'TextToken', + 'tokenize', +] + +class TokenString(Iterable['Token']): + def __init__(self, my_tokens: List['Token'] = None): + if my_tokens is None: + my_tokens = [] + self._tokens = my_tokens + + def __eq__(self, other) -> bool: + return isinstance(other, TokenString) and self._tokens == other._tokens + + def __iter__(self) -> Iterator['Token']: + return iter(self._tokens) + + def split_once(self, delimiter: str) -> Optional[Tuple['TokenString', 'TokenString']]: + result0 = [] + self_iter = iter(self._tokens) + for t in self_iter: + if isinstance(t, TextToken) and delimiter in t.text: + before, after = t.text.split(delimiter, 1) + result0.append(TextToken(before)) + result1 = [TextToken(after), *self_iter] + return TokenString(result0), TokenString(result1) + result0.append(t) + return None + + def lstrip(self): + first_token = self._tokens[0] + if isinstance(first_token, TextToken): + first_token.text = first_token.text.lstrip() + self._tokens[0] = first_token + + def rstrip(self): + last_token = self._tokens[-1] + if isinstance(last_token, TextToken): + last_token.text = last_token.text.rstrip() + self._tokens[-1] = last_token + +@dataclass() +class Token: + pass + +@dataclass() +class TextToken(Token): + text: str + +@dataclass() +class MacroToken(Token): + name: str + replacement: Optional[Tuple[TokenString, TokenString]] = None + +macro_name = take_while1(lambda c: c.isalnum() or c in ['.', '_']) + +def macro_expansion_body(end: str) -> Parser[MacroToken]: + subst = preceded(tag(":"), separated_pair(tokens('='), '=', tokens(end))) + return map_parser(pair(macro_name, opt(subst)), MacroToken) + +parens_macro_expansion = delimited(tag('$('), macro_expansion_body(')'), tag(')')) +braces_macro_expansion = delimited(tag('${'), macro_expansion_body('}'), tag('}')) + +def build_tiny_expansion(name_probably: str) -> Token: + if name_probably == '$': + return TextToken('$') + else: + return MacroToken(name_probably) + +tiny_macro_expansion = map_parser(preceded(tag('$'), verify(any_char, lambda c: c not in ['(', '{'])), build_tiny_expansion) + +macro_expansion = alt(tiny_macro_expansion, parens_macro_expansion, braces_macro_expansion) + +just_text = map_parser(take_till1(lambda c: c == '$'), TextToken) + +def text_until(end: str) -> Parser[TextToken]: + return map_parser(take_till1(lambda c: c in ['$', end]), TextToken) + +def single_token(until: Optional[str] = None) -> Parser[Token]: + if until is None: + text = just_text + else: + text = text_until(until) + return alt(text, macro_expansion) + +empty_tokens = map_parser(tag(''), lambda _: TokenString([TextToken('')])) + +def tokens(until: Optional[str] = None) -> Parser[TokenString]: + return alt(map_parser(many1(single_token(until)), TokenString), empty_tokens) + +full_text_tokens = all_consuming(tokens()) + +def tokenize(text: str) -> TokenString: + result, _ = full_text_tokens(text) + return result + +# TODO handle errors +# TODO test any of this diff --git a/yapymake/util/__init__.py b/yapymake/util/__init__.py new file mode 100644 index 0000000..b26bbcd --- /dev/null +++ b/yapymake/util/__init__.py @@ -0,0 +1 @@ +from .peekable_iterator import PeekableIterator diff --git a/yapymake/util/peekable_iterator.py b/yapymake/util/peekable_iterator.py new file mode 100644 index 0000000..67bd59d --- /dev/null +++ b/yapymake/util/peekable_iterator.py @@ -0,0 +1,30 @@ +from typing import Iterator, Optional, TypeVar + +__all__ = [ + 'PeekableIterator', +] + +T = TypeVar('T') + +class PeekableIterator(Iterator[T]): + _inner: Iterator[T] + _peeked: Optional[T] + + def __init__(self, inner: Iterator[T]): + self._inner = inner + self._peeked = None + + def __iter__(self) -> Iterator[T]: + return self + + def __next__(self) -> T: + if self._peeked is not None: + result = self._peeked + self._peeked = None + return result + return next(self._inner) + + def peek(self) -> T: + if self._peeked is None: + self._peeked = next(self._inner) + return self._peeked -- cgit v1.2.3