diff --git a/atom.mk b/atom.mk index 4bf612c..cbc87fa 100644 --- a/atom.mk +++ b/atom.mk @@ -14,6 +14,10 @@ LOCAL_COPY_FILES := \ ctypeslib/codegen/__init__.py:usr/lib/python/site-packages/ctypeslib/codegen/ \ ctypeslib/codegen/typedesc.py:usr/lib/python/site-packages/ctypeslib/codegen/ \ ctypeslib/codegen/typehandler.py:usr/lib/python/site-packages/ctypeslib/codegen/ \ + ctypeslib/codegen/preprocess.py:usr/lib/python/site-packages/ctypeslib/codegen/ \ + ctypeslib/codegen/cache.py:usr/lib/python/site-packages/ctypeslib/codegen/ \ + ctypeslib/codegen/cindex.py:usr/lib/python/site-packages/ctypeslib/codegen/ \ + ctypeslib/codegen/hash.py:usr/lib/python/site-packages/ctypeslib/codegen/ \ ctypeslib/codegen/util.py:usr/lib/python/site-packages/ctypeslib/codegen/ \ ctypeslib/data/fundamental_type_name.tpl:usr/lib/python/site-packages/ctypeslib/data/ \ ctypeslib/data/headers.tpl:usr/lib/python/site-packages/ctypeslib/data/ \ @@ -41,6 +45,10 @@ define pybinding-macro PRIVATE_SO_FILES = $(shell echo "$4" | sed "s#:# #g") +PRIVATE_PYBINDING_BUNDLE = $$(shell echo $5) +PRIVATE_C_INCLUDES := $$(call module-get-listed-export,host.libclang,C_INCLUDES) +PRIVATE_C_INCLUDES := $$(PRIVATE_C_INCLUDES:%=-I%) +$(call local-get-build-dir)/$1.py: PRIVATE_C_INCLUDES := $$(PRIVATE_C_INCLUDES) $(call local-get-build-dir)/$1.py: PRIVATE_SO_FILES := $$(PRIVATE_SO_FILES) $(call local-get-build-dir)/$1.py: PRIVATE_SRC_FILES = \ $$(foreach header, $$(shell echo "$3" | sed "s#:# #g"), \ @@ -52,6 +60,7 @@ $(call local-get-build-dir)/$1.py: PRIVATE_OBJECT_FLAGS := $$(foreach lib, $$(sh $(call local-get-build-dir)/$1.py: $(shell echo "$4" | sed "s#:# #g") @echo "$$(PRIVATE_MODULE): Generating $1 python binding" @echo "Private object flags: $$(PRIVATE_OBJECT_FLAGS)" + @echo "Private includes: $$(PRIVATE_C_INCLUDES)" @echo "Private so files: $$(PRIVATE_SO_FILES)" $(Q) PYTHONPATH=$(HOST_OUT_STAGING)/usr/lib/python/site-packages \ @@ -68,12 +77,16 @@ $(call local-get-build-dir)/$1.py: $(shell echo "$4" | sed "s#:# #g") $$$$(sed -n -e 's/TARGET_GLOBAL_C_INCLUDES :=//p' $$(PRIVATE_OBJECT_FLAGS) | tr ' ' '\n' | sed -E 's/^(.+)/-I\1/') \ $$$$(sed -n -e 's/PRIVATE_GLOBAL_CFLAGS :=//p' $$(PRIVATE_OBJECT_FLAGS)) \ $$$$(sed -n -e 's/PRIVATE_CFLAGS :=//p' $$(PRIVATE_OBJECT_FLAGS)) \ + $$(PRIVATE_C_INCLUDES) \ + -D__PYBINDING_MACRO__=1 \ -fno-unsigned-char \ " LOCAL_CLEAN_FILES += $(call local-get-build-dir)/$1.py -LOCAL_COPY_FILES += $(call local-get-build-dir)/$1.py:usr/lib/python/site-packages/ -LOCAL_DEPENDS_HOST_MODULES += host.pybinding + +LOCAL_COPY_FILES += $(call local-get-build-dir)/$1.py:usr/lib/python/site-packages/$1/__init__.py + +LOCAL_DEPENDS_HOST_MODULES += host.pybinding host.libclang LOCAL_DEPENDS_MODULES := python LOCAL_LIBRARIES += $(shell echo "$2" | sed "s#:# #g") @@ -81,3 +94,16 @@ endef # Register the macro in alchemy $(call local-register-custom-macro,pybinding-macro) + +include $(CLEAR_VARS) + +LOCAL_MODULE := tst-ctypeslib-basic-types + +LOCAL_C_INCLUDES := $(LOCAL_PATH)/tests $(LOCAL_PATH)/src +LOCAL_CFLAGS := -Wall -Wextra -Werror -std=c99 -pedantic -fpic + +LOCAL_SRC_FILES := test/data/test-basic-types.c + +LOCAL_LIBRARIES := host.pybinding + +include $(BUILD_LIBRARY) diff --git a/ctypeslib/__init__.py b/ctypeslib/__init__.py index 9dcc09b..6c26fb5 100644 --- a/ctypeslib/__init__.py +++ b/ctypeslib/__init__.py @@ -22,13 +22,14 @@ # configure python-clang to use the local clang library try: from ctypes.util import find_library - from clang import cindex + from ctypeslib.codegen import cindex # debug for python-haystack travis-ci v1 = ["clang-%d" % _ for _ in range(14, 6, -1)] v2 = ["clang-%f" % _ for _ in range(6, 3, -1)] v_list = v1 + v2 + ["clang-3.9", "clang-3.8", "clang-3.7"] for version in ["libclang", "clang"] + v_list: if find_library(version) is not None: + from ctypeslib.codegen import cindex cindex.Config.set_library_file(find_library(version)) break else: @@ -40,6 +41,7 @@ cindex.Config.set_library_file(f) def clang_version(): + from ctypeslib.codegen import cindex return cindex.Config.library_file except ImportError as e: print(e) diff --git a/ctypeslib/clang2py.py b/ctypeslib/clang2py.py index f024fd6..c53429e 100755 --- a/ctypeslib/clang2py.py +++ b/ctypeslib/clang2py.py @@ -64,6 +64,11 @@ def windows_dlls(option, opt, value, parser): parser = argparse.ArgumentParser(prog='clang2py', description='Version %s. Generate python code from C headers' % (version)) + parser.add_argument("--advanced-macro", + dest="advanced_macro", + action="store_true", + help="enable function-like macro generation for those which are 'translapilable' to Python", + default=False) parser.add_argument("-c", "--comments", dest="generate_comments", action="store_true", @@ -305,8 +310,9 @@ def windows_dlls(option, opt, value, parser): searched_dlls=dlls, preloaded_dlls=options.preload, types=options.kind, - flags=clang_opts) - finally: + flags=clang_opts, + advanced_macro=options.advanced_macro) + except: if output_file is not None: output_file.close() os.remove(options.output) diff --git a/ctypeslib/codegen/cache.py b/ctypeslib/codegen/cache.py new file mode 100644 index 0000000..826a672 --- /dev/null +++ b/ctypeslib/codegen/cache.py @@ -0,0 +1,85 @@ +import functools +import itertools +import types + +from ctypeslib.codegen.hash import hashable_dict + + +_Tee = itertools.tee([], 1)[0].__class__ + + +disable_cache = False + + +def _get_function_fullname(function): + return f"{function.__module__}.{function.__qualname__}" + + +_cache_functions = { + "ctypeslib.codegen.cindex.Config.lib", + "ctypeslib.codegen.cindex.Cursor.get_tokens", + "ctypeslib.codegen.cindex.SourceLocation.__contains__", + "ctypeslib.codegen.cindex.Token.cursor", + # # The following aren't worth caching + # "ctypeslib.codegen.cindex.Cursor.kind", + # "ctypeslib.codegen.cindex.Token.kind", + # "ctypeslib.codegen.cindex.TokenGroup.get_tokens", + # "ctypeslib.codegen.cindex.TranslationUnit.from_source", + # "ctypeslib.codegen.cursorhandler.CursorHandler.MACRO_DEFINITION", +} + + +def cached(cache_key=None): + def decorator(function): + global disable_cache, _cache_functions + if disable_cache or not _get_function_fullname(function) in _cache_functions: + return function + + cache = {} + args_names = function.__code__.co_varnames[: function.__code__.co_argcount] + + def wrapper(*args, **kwds): + wargs = dict(zip(args_names, args)) + wargs.update(kwds) + wargs = hashable_dict(wargs) + if cache_key is None: + key = hash(wargs) + elif isinstance(cache_key, (int, slice)): + key = tuple(wargs.values())[cache_key] + else: + key = cache_key(wargs) + try: + return cache[key] + except KeyError: + value = function(*args, **kwds) + if isinstance(value, types.GeneratorType): + # flatten the generator + value = tuple(value) + cache[key] = value + return value + + return functools.update_wrapper(wrapper, function) + + return decorator + + +def cached_pure_method(): + return cached(cache_key=slice(1, None)) + + +def cached_property(): + def decorator(function): + return property(cached()(function)) + return decorator + + +def cached_classmethod(): + def decorator(function): + return classmethod(cached()(function)) + return decorator + + +def cached_staticmethod(): + def decorator(function): + return staticmethod(cached()(function)) + return decorator diff --git a/ctypeslib/codegen/cindex.py b/ctypeslib/codegen/cindex.py new file mode 100644 index 0000000..8ff0674 --- /dev/null +++ b/ctypeslib/codegen/cindex.py @@ -0,0 +1,402 @@ +"""Augmented python-clang API: cache-friendly types and missing libclang bindings""" + + +import packaging.version +import collections.abc as collections_abc +import os +import re +from clang import cindex +from ctypes import byref, c_int +import ctypes +from ctypeslib.codegen.cache import ( + cached, + cached_property, + cached_classmethod, + cached_staticmethod, +) +from ctypeslib.codegen.hash import hash_value, hash_combine + +from clang.cindex import * # noqa + + +class SourceLocation(cindex.SourceLocation): + """ + A SourceLocation represents a particular location within a source file. + """ + + def __init__(self, *args, **kwds): + super().__init__(*args, **kwds) + self._cached_instantiation = None + + def __hash__(self): + return hash(self._get_instantiation()) + + _super_get_instantiation = cindex.SourceLocation._get_instantiation + + # _get_instantiation is non-cachable since it is used in __hash__ + def _get_instantiation(self): + if getattr(self, "_cached_instantiation", None) is None: + self._cached_instantiation = self._super_get_instantiation() + return self._cached_instantiation + + +cindex.SourceLocation = SourceLocation + + +class SourceRange(cindex.SourceRange): + """ + A SourceRange describes a range of source locations within the source + code. + """ + + def __init__(self, *args, **kwds): + super().__init__(*args, **kwds) + self._cached_start = None + self._cached_end = None + + def __hash__(self): + return hash_combine((self.start, self.end)) + + # start is non-cachable since it is used in __hash__ + _super_start = cindex.SourceRange.start + + @property + def start(self): + """ + Return a SourceLocation representing the first character within a + source range. + """ + if getattr(self, "_cached_start", None) is None: + self._cached_start = self._super_start + return self._cached_start + + # end is non-cachable since it is used in __hash__ + _super_end = cindex.SourceRange.end + + @property + def end(self): + """ + Return a SourceLocation representing the last character within a + source range. + """ + if getattr(self, "_cached_end", None) is None: + self._cached_end = self._super_end + return self._cached_end + + +cindex.SourceRange = SourceRange + + +class Diagnostic(cindex.Diagnostic): + """ + A Diagnostic is a single instance of a Clang diagnostic. It includes the + diagnostic severity, the message, the location the diagnostic occurred, as + well as additional source ranges and associated fix-it hints. + """ + + def __hash__(self): + return hash_value(self.spelling) + + +cindex.Diagnostic = Diagnostic + + +class Cursor(cindex.Cursor): + """ + The Cursor class represents a reference to an element within the AST. It + acts as a kind of iterator. + """ + + def __hash__(self): + return hash_value(self.hash) + + @cached() + def get_tokens(self): + """Obtain Token instances formulating that compose this Cursor. + This is a generator for Token instances. It returns all tokens which + occupy the extent this cursor occupies. + """ + return cindex.TokenGroup.get_tokens(self._tu, self.extent) + + +cindex.Cursor = Cursor + + +class Type(cindex.Type): + """ + The type of an element in the abstract syntax tree. + """ + + def __init__(self, *args, **kwds): + super().__init__(*args, **kwds) + self._cached_spelling = None + + def __hash__(self): + return hash_value(self.spelling) + + _super_spelling = cindex.Type.spelling + + @property # spelling is non-cachable since it is used in __hash__ + def spelling(self): + """Retrieve the spelling of this Type.""" + if getattr(self, "_cached_spelling", None) is None: + self._cached_spelling = self._super_spelling + return self._cached_spelling + + +cindex.Type = Type + + +class TranslationUnit(cindex.TranslationUnit): + """Represents a source code translation unit. + This is one of the main types in the API. Any time you wish to interact + with Clang's representation of a source file, you typically start with a + translation unit. + """ + + def __init__(self, *args, **kwds): + super().__init__(*args, **kwds) + self._cached_spelling = None + + def __hash__(self): + return hash_value(self.spelling) + + _super_spelling = cindex.TranslationUnit.spelling + + @property # spelling is non-cachable since it is used in __hash__ + def spelling(self): + """Get the original translation unit source file name.""" + if getattr(self, "_cached_spelling", None) is None: + self._cached_spelling = self._super_spelling + return self._cached_spelling + + _super_from_source = cindex.TranslationUnit.__dict__["from_source"] + + @classmethod + def from_source(cls, filename, cli_args=None, *args, **kwds): + if cli_args is None: + cli_args = [] + if cindex.conf.include_path is not None: + cli_args.append(f"-isystem{cindex.conf.include_path}") + return cls._super_from_source(filename, cli_args, *args, **kwds) + + +cindex.TranslationUnit = TranslationUnit + + +class File(cindex.File): + """ + The File class represents a particular source file that is part of a + translation unit. + """ + + def __init__(self, *args, **kwds): + super().__init__(*args, **kwds) + self._cached_name = None + + def __hash__(self): + return hash_value(self.name) + + _super_name = cindex.File.name + + @property + def name(self): + """Return the complete file and path name of the file.""" + if getattr(self, "_cached_name", None) is None: + self._cached_name = self._super_name + return self._cached_name + + +cindex.File = File + + +class Token(cindex.Token): + """Represents a single token from the preprocessor. + Tokens are effectively segments of source code. Source code is first parsed + into tokens before being converted into the AST and Cursors. + Tokens are obtained from parsed TranslationUnit instances. You currently + can't create tokens manually. + """ + + def __init__(self, *args, **kwds): + super().__init__(*args, **kwds) + self._cached_spelling = None + + def __hash__(self): + return hash_combine((self._tu, self.spelling)) + + _super_spelling = cindex.Token.spelling + + @property # spelling is non-cachable since it is used in __hash__ + def spelling(self): + """The spelling of this token. + This is the textual representation of the token in source. + """ + if getattr(self, "_cached_spelling", None) is None: + self._cached_spelling = self._super_spelling + return self._cached_spelling + + _super_cursor = cindex.Token.cursor + + @cached_property() + def cursor(self): + """The Cursor this Token corresponds to.""" + return self._super_cursor + + +cindex.Token = Token + + +# Missing functions and types from python-clang +class TargetInfo(cindex.ClangObject): + @classmethod + def from_translation_unit(self, tu): + return TargetInfo(cindex.conf.lib.clang_getTranslationUnitTargetInfo(tu)) + + @cached_property() + def triple(self): + return cindex.conf.lib.clang_TargetInfo_getTriple(self) + + @cached_property() + def pointer_width(self): + return int(cindex.conf.lib.clang_TargetInfo_getPointerWidth(self)) + + def __del__(self): + cindex.conf.lib.clang_TargetInfo_dispose(self) + + +class Config(cindex.Config): + + library_include_dir = None + + @cindex.CachedProperty + def lib(self): + lib = self.get_cindex_library() + register_functions(lib, not Config.compatibility_check) + Config.loaded = True + return lib + + @staticmethod + def set_include_dir(library_include_dir): + Config.library_include_dir = library_include_dir + + @cindex.CachedProperty + def clang_version(self): + try: + lib = self.lib + except cindex.LibclangError: + return None + version = lib.clang_getClangVersion() + match = re.search(r"((?:[^:\s]+:([^-\s]*)[^\s]*)|(([^-\s]+)-?(?:[^\s]*)))$", version) + if not match: + return None + version = match.group(2) or match.group(4) + if not version: + return None + version = packaging.version.parse(version) + return version + + @cindex.CachedProperty + def include_path(self): + if Config.library_include_dir is not None: + return Config.library_include_dir + version = self.clang_version + if version is None or version.release is None: + return None + version = ".".join(map(str, version.release[:2])) + path = f"/usr/include/clang/{version}" + if not os.path.exists(path): + return None + return path + + +# monkey_patch cindex.conf object Class +cindex.conf.__class__ = Config + + +_functionList = cindex.functionList + [ + ("clang_getTranslationUnitTargetInfo", [TranslationUnit], cindex.c_object_p), + ("clang_TargetInfo_dispose", [TargetInfo]), + ( + "clang_TargetInfo_getTriple", + [TargetInfo], + cindex._CXString, + cindex._CXString.from_result, + ), + ("clang_TargetInfo_getPointerWidth", [TranslationUnit], ctypes.c_int), + ("clang_getClangVersion", [], cindex._CXString, cindex._CXString.from_result), +] + + +def _monkey_patch_type(_type): + if getattr(_type, "__module__", None) in ( + "clang.cindex", + "ctypes", + ) and isinstance(_type, type): + if issubclass(_type, ctypes._Pointer): + _type = ctypes.POINTER(_monkey_patch_type(_type._type_)) + elif issubclass(_type, ctypes._CFuncPtr): + _type = _monkey_patch_funcptr(_type) + else: + _type = globals().get(_type.__name__, _type) + return _type + + +_c_functype_cache = {} + + +def _monkey_patch_funcptr(funcptr): + global _c_functype_cache + argtypes = funcptr._argtypes_ + restype = funcptr._restype_ + flags = funcptr._flags_ + try: + return _c_functype_cache[(restype, argtypes, flags)] + except KeyError: + if argtypes: + patched_argtypes = tuple(map(_monkey_patch_type, argtypes)) + + class CFunctionType(ctypes._CFuncPtr): + _argtypes_ = patched_argtypes + _restype_ = _monkey_patch_type(restype) + _flags_ = flags + + _c_functype_cache[(restype, argtypes, flags)] = CFunctionType + return CFunctionType + + +def _monkey_patch_func(func): + if isinstance(func, type) and issubclass(func, ctypes._CFuncPtr): + return _monkey_patch_funcptr(func) + if not hasattr(func, "restype"): + return func + func.restype = _monkey_patch_type(func.restype) + if func.argtypes: + func.argtypes = tuple(map(_monkey_patch_type, func.argtypes)) + return func + + +def register_functions(lib, ignore_errors): + """Register function prototypes with a libclang library instance. + This must be called as part of library instantiation so Python knows how + to call out to the shared library. + """ + cindex.callbacks.update( + {name: _monkey_patch_func(func) for name, func in cindex.callbacks.items()} + ) + + def register(item): + if len(item) == 4: + errcheck_func = item[3] + item = item[:3] + (_monkey_patch_func(errcheck_func),) + cindex.register_function(lib, item, ignore_errors) + func = getattr(lib, item[0]) + _monkey_patch_func(func) + + for f in _functionList: + register(f) + + +__all__ = cindex.__all__ + [ + "TargetInfo" +] diff --git a/ctypeslib/codegen/clangparser.py b/ctypeslib/codegen/clangparser.py index 38df657..293a06d 100644 --- a/ctypeslib/codegen/clangparser.py +++ b/ctypeslib/codegen/clangparser.py @@ -1,13 +1,19 @@ """clangparser - use clang to get preprocess a source code.""" +import collections +import functools +import itertools import logging import os -import collections +import platform -from clang.cindex import Index, TranslationUnit -from clang.cindex import TypeKind +from ctypeslib.codegen.cindex import Index, TranslationUnit, TargetInfo +from ctypeslib.codegen.cindex import TypeKind +from ctypeslib.codegen.hash import hash_combine +from ctypeslib.codegen import cache from ctypeslib.codegen import cursorhandler +from ctypeslib.codegen import preprocess from ctypeslib.codegen import typedesc from ctypeslib.codegen import typehandler from ctypeslib.codegen import util @@ -15,6 +21,7 @@ from ctypeslib.codegen.handler import InvalidDefinitionError from ctypeslib.codegen.handler import InvalidTranslationUnitException + log = logging.getLogger('clangparser') @@ -68,6 +75,18 @@ def __init__(self, flags): self._unhandled = [] self.fields = {} self.tu = None + local_triple = f"{platform.machine()}-{platform.system()}".lower() + self.target_triple = local_triple + flag_iterator = iter(flags) + flags = [] + for (flag, argument) in itertools.zip_longest(flag_iterator, flag_iterator): + if flag == "-target": + self.target_triple = argument + if self.target_triple == local_triple: + continue + flags.append(flag) + if argument is not None: + flags.append(argument) self.flags = flags self.ctypes_sizes = {} self.init_parsing_options() @@ -76,15 +95,22 @@ def __init__(self, flags): self.typekind_handler = typehandler.TypeHandler(self) self.__filter_location = None self.__processed_location = set() + self._advanced_macro = False + self.interpreter_namespace = {} def init_parsing_options(self): """Set the Translation Unit to skip functions bodies per default.""" self.tu_options = TranslationUnit.PARSE_SKIP_FUNCTION_BODIES - def activate_macros_parsing(self): + def activate_macros_parsing(self, advanced_macro=False): """Activates the detailled code parsing options in the Translation Unit.""" self.tu_options |= TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD + self._advanced_macro = advanced_macro + + @property + def advanced_macro(self): + return self._advanced_macro def activate_comment_parsing(self): """Activates the comment parsing options in the Translation Unit.""" @@ -97,6 +123,17 @@ def filter_location(self, src_files): self.__filter_location = list( map(lambda f: os.path.abspath(f), src_files)) + @cache.cached_pure_method() + def _do_parse(self, filename): + if os.path.abspath(filename) in self.__processed_location: + return None + index = Index.create() + tu = index.parse(filename, self.flags, options=self.tu_options) + if not tu: + log.warning("unable to load input") + return None + return tu + def parse(self, filename): """ . reads 1 file @@ -109,25 +146,27 @@ def parse(self, filename): . for each VAR_DECL, register a Variable . for each TYPEREF ?? """ - if os.path.abspath(filename) in self.__processed_location: - return - index = Index.create() - tu = index.parse(filename, self.flags, options=self.tu_options) - if not tu: - log.warning("unable to load input") + self.tu = self._do_parse(filename) + if self.tu is None: return - self._parse_tu_diagnostics(tu, filename) - self.tu = tu + self.ti = TargetInfo.from_translation_unit(self.tu) + self._parse_tu_diagnostics(self.tu, filename) root = self.tu.cursor for node in root.get_children(): self.startElement(node) return - def parse_string(self, input_data, lang='c', all_warnings=False, flags=None): + @cache.cached_pure_method() + def _do_parse_string(self, input_data, lang='c', all_warnings=False, flags=None): """Use this parser on a memory string/file, instead of a file on disk""" tu = util.get_tu(input_data, lang, all_warnings, flags) - self._parse_tu_diagnostics(tu, "memory_input.c") - self.tu = tu + return tu + + def parse_string(self, input_data, lang='c', all_warnings=False, flags=None): + """Use this parser on a memory string/file, instead of a file on disk""" + self.tu = self._do_parse_string(input_data, lang, all_warnings, flags) + self.ti = TargetInfo.from_translation_unit(self.tu) + self._parse_tu_diagnostics(self.tu, "memory_input.c") root = self.tu.cursor for node in root.get_children(): self.startElement(node) @@ -197,7 +236,8 @@ def startElement(self, node): def register(self, name, obj): """Registers an unique type description""" - if (name, obj) in self.all_set: + all_set_key = hash_combine((name, hash(obj))) + if all_set_key in self.all_set and name in self.all: log.debug('register: %s already defined: %s', name, obj.name) return self.all[name] if name in self.all: @@ -214,7 +254,7 @@ def register(self, name, obj): return obj log.debug('register: %s ', name) self.all[name] = obj - self.all_set.add((name, obj)) + self.all_set.add(all_set_key) return obj def get_registered(self, name): @@ -225,11 +265,25 @@ def is_registered(self, name): """Checks if a named type description is registered""" return name in self.all + def update_register(self, name, new_obj): + assert self.is_registered(name) + obj = self.all.pop(name) + all_set_key = hash_combine((name, obj)) + try: + self.all_set.remove(all_set_key) + except KeyError: + # leak the previous definition hash in all_set + pass + all_set_key = hash_combine((name, new_obj)) + self.all[name] = new_obj + self.all_set.add(all_set_key) + return new_obj + def remove_registered(self, name): """Removes a named type""" log.debug('Unregister %s', name) - self.all_set.remove((name, self.all[name])) - del self.all[name] + obj = self.all.pop(name) + self.all_set.remove(hash_combine((name, obj))) def make_ctypes_convertor(self, _flags): """ @@ -304,6 +358,12 @@ def get_ctypes_name(self, typekind): def get_ctypes_size(self, typekind): return self.ctypes_sizes[typekind] + def get_pointer_width(self): + return self.ti.pointer_width + + def get_platform_triple(self): + return self.ti.triple + def parse_cursor(self, cursor): """Forward parsing calls to dedicated CursorKind Handlder""" return self.cursorkind_handler.parse_cursor(cursor) @@ -376,11 +436,6 @@ def get_result(self): if location and hasattr(location, 'file'): _item.location = location.file.name, location.line log.error('%s %s came in with a SourceLocation', _id, _item) - elif location is None: - # FIXME make this optional to be able to see internals - # FIXME macro/alias are here - log.warning("No source location in %s - ignoring", _id) - remove.append(_id) for _x in remove: self.remove_registered(_x) @@ -402,4 +457,7 @@ def get_result(self): result.append(i) log.debug("parsed items order: %s", result) - return result + return tuple(result) + + def interprete(self, expr): + preprocess.exec_processed_macro(expr, self.interpreter_namespace) diff --git a/ctypeslib/codegen/codegenerator.py b/ctypeslib/codegen/codegenerator.py index 8c0231a..59fe215 100644 --- a/ctypeslib/codegen/codegenerator.py +++ b/ctypeslib/codegen/codegenerator.py @@ -13,17 +13,50 @@ import sys import textwrap from io import StringIO +from collections import defaultdict -from clang.cindex import TypeKind - +from ctypeslib.codegen import cache from ctypeslib.codegen import clangparser from ctypeslib.codegen import typedesc from ctypeslib.codegen import util +from ctypeslib.codegen.preprocess import ( + process_c_literals, + process_macro_function, + replace_builtins, + replace_pointer_types, +) + +log = logging.getLogger('codegen') + + +class GeneratorResult: + def __init__(self): + self._out = defaultdict(StringIO) + + def stream_names(self): + return list(self._out.keys()) + + def streams(self): + for k, v in self._out.items(): + yield (k, v.getvalue()) + + def get(self, name): + return self._out[name].getvalue() + + def update(self, other): + for k, v in other._out.items(): + self._out[k].write(v.getvalue()) -log = logging.getLogger("codegen") + def write(self, name, data, end=None): + if end is None: + end = "\n" + self._out[name].write(data) + if end: + self._out[name].write(end) -class Generator: +class Generator(object): + def __init__( self, output, @@ -65,7 +98,11 @@ def enable_fundamental_type_wrappers(self): to be in the generated code to be valid. """ self.enable_fundamental_type_wrappers = lambda: True - headers = pkgutil.get_data("ctypeslib", "data/fundamental_type_name.tpl").decode() + import pkgutil + headers = pkgutil.get_data( + "ctypeslib", + "data/fundamental_type_name.tpl").decode() + from ctypeslib.codegen.cindex import TypeKind size = str(self.parser.get_ctypes_size(TypeKind.LONGDOUBLE) // 8) headers = headers.replace("__LONG_DOUBLE_SIZE__", size) print(headers, file=self.imports) @@ -79,7 +116,10 @@ def enable_pointer_type(self): if not self.cross_arch_code_generation: return "ctypes.POINTER" self.enable_pointer_type = lambda: "POINTER_T" + import pkgutil headers = pkgutil.get_data("ctypeslib", "data/pointer_type.tpl").decode() + import ctypes + from ctypeslib.codegen.cindex import TypeKind # assuming a LONG also has the same sizeof than a pointer. word_size = self.parser.get_ctypes_size(TypeKind.POINTER) // 8 word_type = self.parser.get_ctypes_name(TypeKind.ULONG) @@ -109,21 +149,36 @@ def enable_string_cast(self): headers = headers.replace("__POINTER_TYPE__", self.enable_pointer_type()) print(headers, file=self.imports) + def enable_macro_processing(self): + """ + If a structure type is used, declare our ctypes.Structure extension type + """ + self.enable_macro_processing = lambda: True + if not self.parser.advanced_macro: + return + import pkgutil + shared = pkgutil.get_data('ctypeslib', 'codegen/preprocess.py').decode() + print(shared, file=self.imports) + return + def generate_headers(self, parser): # fix parser in self for later use self.parser = parser - headers = pkgutil.get_data("ctypeslib", "data/headers.tpl").decode() + import pkgutil + headers = pkgutil.get_data('ctypeslib', 'data/headers.tpl').decode() + from ctypeslib.codegen.cindex import TypeKind # get sizes from clang library word_size = self.parser.get_ctypes_size(TypeKind.LONG) // 8 pointer_size = self.parser.get_ctypes_size(TypeKind.POINTER) // 8 longdouble_size = self.parser.get_ctypes_size(TypeKind.LONGDOUBLE) // 8 # replacing template values - headers = headers.replace("__FLAGS__", str(self.parser.flags)) + headers = headers.replace("__FLAGS__", str(self.parser.target_triple)) headers = headers.replace("__WORD_SIZE__", str(word_size)) headers = headers.replace("__POINTER_SIZE__", str(pointer_size)) headers = headers.replace("__LONGDOUBLE_SIZE__", str(longdouble_size)) print(headers, file=self.imports) + @cache.cached_pure_method() def type_name(self, t, generate=True): """ Returns a string containing an expression that can be used to @@ -139,22 +194,23 @@ def type_name(self, t, generate=True): # return t.name if isinstance(t, typedesc.FundamentalType): - return self.FundamentalType(t) - if isinstance(t, typedesc.ArrayType): - return "%s * %s" % (self.type_name(t.typ, generate), t.size) - if isinstance(t, typedesc.PointerType) and isinstance(t.typ, typedesc.FunctionType): + return self._get_fundamental_typename(t) + elif isinstance(t, typedesc.ArrayType): + return f"{self.type_name(t.typ, generate)} * {t.size}" + elif isinstance(t, typedesc.PointerType) and isinstance(t.typ, typedesc.FunctionType): return self.type_name(t.typ, generate) - if isinstance(t, typedesc.PointerType): + elif isinstance(t, typedesc.PointerType): pointer_class = self.enable_pointer_type() if t.typ.name in ["c_ubyte", "c_char"]: self.enable_string_cast() - return "%s(%s)" % (pointer_class, self.type_name(t.typ, generate)) - if isinstance(t, typedesc.FunctionType): - args = [self.type_name(x, generate) for x in [t.returns] + list(t.iterArgTypes())] + return f"{pointer_class}({self.type_name(t.typ, generate)})" + elif isinstance(t, typedesc.FunctionType): + args = (self.type_name(x, generate) for x in [t.returns] + list(t.iterArgTypes())) + args = ", ".join(args) if "__stdcall__" in t.attributes: - return "ctypes.WINFUNCTYPE(%s)" % ", ".join(args) + return f"ctypes.WINFUNCTYPE({args})" else: - return "ctypes.CFUNCTYPE(%s)" % ", ".join(args) + return f"ctypes.CFUNCTYPE({args})" # elif isinstance(t, typedesc.Structure): # elif isinstance(t, typedesc.Typedef): # elif isinstance(t, typedesc.Union): @@ -166,17 +222,20 @@ def type_name(self, t, generate=True): _aliases = 0 + @cache.cached_pure_method() def Alias(self, alias): """Handles Aliases. No test cases yet""" # FIXME + ret = GeneratorResult() if self.generate_comments: - self.print_comment(alias) - print("%s = %s # alias" % (alias.name, alias.alias), file=self.stream) + ret.update(self.print_comment(alias)) self._aliases += 1 - return + ret.write("stream", f"{alias.name} = {alias.alias}# alias") + return ret _macros = 0 + @cache.cached_pure_method() def Macro(self, macro): """ Handles macro. No test cases else that #defines. @@ -187,70 +246,84 @@ def Macro(self, macro): Just push the macro in comment, and let the rest work away """ + ret = GeneratorResult() if macro.location is None: - log.info("Ignoring %s with no location", macro.name) - return + log.info('Ignoring %s with no location', macro.name) + return ret + self.enable_macro_processing() if self.generate_locations: - print("# %s:%s" % macro.location, file=self.stream) + ret.write("stream", f"# {macro.name}:{macro.location}") if self.generate_comments: - self.print_comment(macro) + ret.update(self.print_comment(macro)) # get tokens types all the way to here ? # 1. clang makes the decision on type casting and validity of data. # let's not try to be clever. # only ignore, undefined references, macro functions... - # 2. or get a flag in macro that tells us if something contains undefinedIdentifier - # is not code-generable ? + # 2. or get a flag in macro that tells us if something contains undefinedIdentifier /is not codegenable ? # codegen should decide what codegen can do. - if macro.args: - print("# def %s%s: # macro" % (macro.name, macro.args), file=self.stream) - print("# return %s " % macro.body, file=self.stream) - elif util.contains_undefined_identifier(macro): + macro_args = macro.args + macro_body = macro.body + if util.contains_invalid_code(macro): # we can't handle that, we comment it out - if isinstance(macro.body, typedesc.UndefinedIdentifier): - print("# %s = %s # macro" % (macro.name, macro.body.name), file=self.stream) + if isinstance(macro.body, typedesc.InvalidGeneratedMacro): + ret.write("stream", f"# {macro.name} = {macro.body.code} # macro") + elif isinstance(macro.body, typedesc.UndefinedIdentifier): + ret.write("stream", f"# {macro.name} = {macro.body.name} # macro") else: # we assume it's a list - print("# %s = %s # macro" % (macro.name, " ".join([str(_) for _ in macro.body])), file=self.stream) - elif isinstance(macro.body, bool): - print("%s = %s # macro" % (macro.name, macro.body), file=self.stream) + macro_body = " ".join(str(_) for _ in macro.body) + ret.write("stream", f"# {macro.name} = {macro_body} # macro") + elif macro_args: + if self.parser.advanced_macro: + macro_func = process_macro_function(macro.name, macro.args, macro.body) + if macro_func is not None: + ret.write("stream", f"\n# macro function {macro.name}{macro_func}") + else: + ret.write("stream", f"\n# invalid macro function {macro.name}{macro.body}") + else: + ret.write("stream", f"\n# macro function {macro.name}") + + elif isinstance(macro_body, bool): + ret.write("stream", f"{macro.name} = {macro_body} # macro") + self.macros += 1 + self.names.append(macro.name) + elif isinstance(macro_body, str): + macro_body = macro_body + macro_body = process_c_literals(macro_body, self.parser.get_pointer_width()) + macro_body = replace_builtins(macro_body) + macro_body = replace_pointer_types(macro_body) + # what about integers you ask ? body token that represents token are Integer here. + # either it's just a thing we gonna print, or we need to have a registered item + ret.write("stream", f"{macro.name} = ({macro_body}) # macro") self.macros += 1 self.names.append(macro.name) - elif isinstance(macro.body, str): - if macro.body == "": - print("# %s = %s # macro" % (macro.name, macro.body), file=self.stream) - else: - body = macro.body - float_value = util.from_c_float_literal(body) - if float_value is not None: - body = float_value - # what about integers you ask ? body token that represents token are Integer here. - # either it's just a thing we gonna print, or we need to have a registered item - print("%s = %s # macro" % (macro.name, body), file=self.stream) - self.macros += 1 - self.names.append(macro.name) # This is why we need to have token types all the way here. # but at the same time, clang does not type tokens. So we might as well guess them here too - elif util.body_is_all_string_tokens(macro.body): - print("%s = %s # macro" % (macro.name, " ".join([str(_) for _ in macro.body])), file=self.stream) + elif util.body_is_all_string_tokens(macro_body): + macro_body = "".join(str(_) for _ in macro.body) + ret.write("stream", f"{macro.name} = ({macro_body}) # macro") self.macros += 1 self.names.append(macro.name) + elif macro_body is None: + ret.write("stream", f"# {macro.name} = ({macro_body}) # macro") else: # this might be a token list of float literal - body = macro.body - float_value = util.from_c_float_literal(body) - if float_value is not None: - body = float_value + macro_body = macro_body + macro_body = process_c_literals(macro_body, self.parser.get_pointer_width()) + macro_body = replace_builtins(macro_body) # or anything else that might be a valid python literal... - print("%s = %s # macro" % (macro.name, body), file=self.stream) + ret.write("stream", f"{macro.name} = ({macro_body}) # macro") self.macros += 1 self.names.append(macro.name) - return + return ret _typedefs = 0 + @cache.cached() def Typedef(self, tp): + ret = GeneratorResult() if self.generate_comments: - self.print_comment(tp) + ret.update(self.print_comment(tp)) sized_types = { "uint8_t": "c_uint8", "uint16_t": "c_uint16", @@ -262,10 +335,11 @@ def Typedef(self, tp): "int64_t": "c_int64", } name = self.type_name(tp) # tp.name - if isinstance(tp.typ, typedesc.FundamentalType) and tp.name in sized_types: - print("%s = ctypes.%s" % (name, sized_types[tp.name]), file=self.stream) + if (isinstance(tp.typ, typedesc.FundamentalType) and + tp.name in sized_types): + ret.write("stream", f"{name} = ctypes.{sized_types[tp.name]}") self.names.append(tp.name) - return + return ret if tp.typ not in self.done: # generate only declaration code for records ? # if type(tp.typ) in (typedesc.Structure, typedesc.Union): @@ -273,24 +347,27 @@ def Typedef(self, tp): # self.more.add(tp.typ) # else: # self._generate(tp.typ) - self._generate(tp.typ) + ret.update(self._generate(tp.typ)) # generate actual typedef code. if tp.name != self.type_name(tp.typ): - print("%s = %s" % (name, self.type_name(tp.typ)), file=self.stream) + ret.write("stream", f"{name} = {self.type_name(tp.typ)}") if isinstance(tp.typ, typedesc.Enumeration): - print("%s__enumvalues = %s__enumvalues" % (name, self.type_name(tp.typ)), file=self.stream) - self.names.append("%s__enumvalues" % name) + ret.write( + "stream", + f"{name}__enumvalues = {self.type_name(tp.typ)}__enumvalues" + ) + self.names.append(f"{name}__enumvalues") self.names.append(tp.name) self._typedefs += 1 - return + return ret def _get_real_type(self, tp): # FIXME, kinda useless really. if isinstance(tp, typedesc.Typedef): if isinstance(tp.typ, typedesc.Typedef): - raise TypeError("Nested loop in Typedef %s" % tp.name) + raise TypeError(f"Nested loop in Typedef {tp.name}") return self._get_real_type(tp.typ) elif isinstance(tp, typedesc.CvQualifiedType): return self._get_real_type(tp.typ) @@ -298,60 +375,76 @@ def _get_real_type(self, tp): _arraytypes = 0 + @cache.cached() def ArrayType(self, tp): - self._generate(self._get_real_type(tp.typ)) - self._generate(tp.typ) + ret = GeneratorResult() + ret.update(self._generate(self._get_real_type(tp.typ))) + ret.update(self._generate(tp.typ)) self._arraytypes += 1 + return ret _functiontypes = 0 _notfound_functiontypes = 0 + @cache.cached() def FunctionType(self, tp): - self._generate(tp.returns) - self.generate_all(tp.arguments) + ret = GeneratorResult() + ret.update(self._generate(tp.returns)) + ret.update(self.generate_all(tp.arguments)) # print >> self.stream, "%s = %s # Functiontype " % ( # self.type_name(tp), [self.type_name(a) for a in tp.arguments]) self._functiontypes += 1 + return ret + @cache.cached() def Argument(self, tp): - self._generate(tp.typ) + return self._generate(tp.typ) _pointertypes = 0 + @cache.cached() def PointerType(self, tp): + ret = GeneratorResult() # print 'generate', tp.typ if isinstance(tp.typ, typedesc.PointerType): - self._generate(tp.typ) + ret.update(self._generate(tp.typ)) elif type(tp.typ) in (typedesc.Union, typedesc.Structure): - self._generate(tp.typ.get_head()) + ret.update(self._generate(tp.typ.get_head())) self.more[tp.typ] = True elif isinstance(tp.typ, typedesc.Typedef): - self._generate(tp.typ) + ret.update(self._generate(tp.typ)) else: - self._generate(tp.typ) + ret.update(self._generate(tp.typ)) self._pointertypes += 1 + return ret + @cache.cached() def CvQualifiedType(self, tp): - self._generate(tp.typ) + return self._generate(tp.typ) _variables = 0 _notfound_variables = 0 + @cache.cached() def Variable(self, tp): + ret = GeneratorResult() self._variables += 1 if self.generate_comments: - self.print_comment(tp) + ret.update(self.print_comment(tp)) # 2021-02 give me a test case for this. it breaks all extern variables otherwise. if tp.extern and self.find_library_with_func(tp): dll_library = self.find_library_with_func(tp) - self._generate(tp.typ) + ret.update(self._generate(tp.typ)) # calling convention does not matter for in_dll... - libname = self.get_sharedlib(dll_library, "cdecl") - print("%s = (%s).in_dll(%s, '%s')" % (tp.name, self.type_name(tp.typ), libname, tp.name), file=self.stream) + libname = self.get_sharedlib(ret, dll_library, "cdecl") + ret.write( + "stream", + f"{tp.name} = ({self.type_name(tp.typ)}).in_dll({libname}, '{tp.name}')" + ) self.names.append(tp.name) # wtypes.h contains IID_IProcessInitControl, for example - return + return ret # Hm. The variable MAY be a #define'd symbol that we have # artifically created, or it may be an exported variable that @@ -363,10 +456,10 @@ def Variable(self, tp): # return # el if isinstance(tp.init, typedesc.FunctionType): - _args = [x for x in tp.typ.iterArgNames()] - print("%s = %s # args: %s" % (tp.name, self.type_name(tp.init), _args), file=self.stream) + func_args = tuple(x for x in tp.typ.iterArgNames()) + ret.write("stream", f"{tp.name} = {self.type_name(tp.init)} # args: {func_args}") self.names.append(tp.name) - return + return ret elif isinstance(tp.typ, typedesc.PointerType) or isinstance(tp.typ, typedesc.ArrayType): if isinstance(tp.typ.typ, typedesc.FundamentalType) and ( tp.typ.typ.name in ["c_ubyte", "c_char", "c_wchar"] @@ -381,7 +474,8 @@ def Variable(self, tp): # array of number # CARE: size of elements must match size of array # init_value = repr(tp.init) - init_value = "[%s]" % ",".join([str(x) for x in tp.init]) + init_value = ",".join(str(x) for x in tp.init) + init_value = f"[{init_value}]" # we do NOT want Variable to be described as ctypes object # when we can have a python abstraction for them. # init_value_type = self.type_name(tp.typ, False) @@ -423,40 +517,49 @@ def Variable(self, tp): init_value = 0 # # print it out - print("%s = %s # Variable %s" % (tp.name, init_value, self.type_name(tp.typ, False)), file=self.stream) + ret.write( + "stream", + f"{tp.name} = {init_value} # Variable {self.type_name(tp.typ, False)}" + ) # self.names.append(tp.name) + return ret _enumvalues = 0 + @cache.cached_pure_method() def EnumValue(self, tp): # FIXME should be in parser + ret = GeneratorResult() value = int(tp.value) - print("%s = %d" % (tp.name, value), file=self.stream) + ret.write("stream", f"{tp.name} = {value}") self.names.append(tp.name) self._enumvalues += 1 + return ret _enumtypes = 0 + @cache.cached_pure_method() def Enumeration(self, tp): + ret = GeneratorResult() if self.generate_comments: - self.print_comment(tp) - print("", file=self.stream) + ret.update(self.print_comment(tp)) + ret.write("stream", u'') if tp.name: - print("# values for enumeration '%s'" % tp.name, file=self.stream) + ret.write("stream", f"# values for enumeration '{tp.name}'") else: - print("# values for unnamed enumeration", file=self.stream) - print("%s__enumvalues = {" % tp.name, file=self.stream) + ret.write("stream", "# values for unnamed enumeration") + ret.write("stream", f"{tp.name}__enumvalues = {{") for item in tp.values: - print(" %s: '%s'," % (int(item.value), item.name), file=self.stream) - print("}", file=self.stream) + ret.write("stream", f" {int(item.value)}: '{item.name}',") + ret.write("stream", "}") # Some enumerations have the same name for the enum type # and an enum value. Excel's XlDisplayShapes is such an example. # Since we don't have separate namespaces for the type and the values, # we generate the TYPE last, overwriting the value. XXX for item in tp.values: - self._generate(item) + ret.update(self._generate(item)) if tp.name: # Enums can be forced to occupy less space than an int when the compiler flag '-fshort-enums' is set. # The size adjustment is done when possible, depending on the values of the enum. @@ -488,9 +591,10 @@ def Enumeration(self, tp): else: enum_ctype = 'ctypes.c_int' if has_negative else 'ctypes.c_uint' - print("%s = %s # enum" % (tp.name, enum_ctype), file=self.stream) + ret.write("stream", f"{tp.name} = {enum_ctype} # enum") self.names.append(tp.name) self._enumtypes += 1 + return ret def get_undeclared_type(self, item): """ @@ -540,18 +644,20 @@ def _get_undefined_body_dependencies(self, struct): _structures = 0 + @cache.cached() def Structure(self, struct): + ret = GeneratorResult() if struct.name in self.head_generated and struct.name in self.body_generated: self.done[struct] = True - return + return ret self.enable_structure_type() self._structures += 1 depends = set() # We only print a empty struct. if struct.members is None: - log.info("No members for: %s", struct.name) - self._generate(struct.get_head(), False) - return + log.info('No members for: %s', struct.name) + ret.update(self._generate(struct.get_head(), False)) + return ret # look in bases class for dependencies # FIXME - need a real dependency graph maker # remove myself, just in case. @@ -564,7 +670,7 @@ def Structure(self, struct): if len(depends) > 0: log.debug("Generate %s DEPENDS for Bases %s", struct.name, depends) for dep in depends: - self._generate(dep) + ret.update(self._generate(dep)) # checks members dependencies # test_record_ordering head does not mean declared. _fields_ mean declared @@ -584,69 +690,74 @@ def Structure(self, struct): # generate this head and body in one go # if struct.get_head() not in self.done: if struct.name not in self.head_generated: - self._generate(struct.get_head(), True) - self._generate(struct.get_body(), True) + ret.update(self._generate(struct.get_head(), True)) + ret.update(self._generate(struct.get_body(), True)) else: - self._generate(struct.get_body(), False) + ret.update(self._generate(struct.get_body(), False)) else: # generate this head first, to avoid recursive issue, then the dep, then this body - self._generate(struct.get_head(), False) + ret.update(self._generate(struct.get_head(), False)) for dep in undefined_head_dependencies: - self._generate(dep) - self._generate(struct.get_body(), False) + ret.update(self._generate(dep)) + ret.update(self._generate(struct.get_body(), False)) else: # hard dep on defining the body of these dependencies # generate this head first, to avoid recursive issue, then the dep, then this body - self._generate(struct.get_head(), False) + ret.update(self._generate(struct.get_head(), False)) for dep in undefined_head_dependencies: - self._generate(dep) + ret.update(self._generate(dep)) for dep in undefined_body_dependencies: - self._generate(dep) + ret.update(self._generate(dep)) for dep in undefined_body_dependencies: if isinstance(dep, typedesc.Structure): - self._generate(dep.get_body(), False) - self._generate(struct.get_body(), False) + ret.update(self._generate(dep.get_body(), False)) + ret.update(self._generate(struct.get_body(), False)) + # we defined ourselve self.done[struct] = True + return ret + Union = Structure + @cache.cached() def StructureHead(self, head, inline=False): + ret = GeneratorResult() if head.name in self.head_generated: log.debug("Skipping - Head already generated for %s", head.name) - return + return ret log.debug("Head start for %s inline:%s", head.name, inline) for struct in head.struct.bases: - self._generate(struct.get_head()) + ret.update(self._generate(struct.get_head())) # add dependencies self.more[struct] = True basenames = [self.type_name(b) for b in head.struct.bases] if basenames: # method_names = [m.name for m in head.struct.members if type(m) is typedesc.Method] - print( - "class %s(%s):" % (head.struct.name, ", ".join(basenames)), - file=self.stream, - ) + ret.write("stream", f"class {head.struct.name}({', '.join(basenames)}):") else: # methods = [m for m in head.struct.members if type(m) is typedesc.Method] if isinstance(head.struct, typedesc.Structure): # Inherit from our ctypes.Structure extension - print("class %s(Structure):" % head.struct.name, file=self.stream) + ret.write("stream", f"class {head.struct.name}(Structure):") elif isinstance(head.struct, typedesc.Union): - print("class %s(Union):" % head.struct.name, file=self.stream) + ret.write("stream", f"class {head.struct.name}(Union):") if not inline: - print(" pass\n", file=self.stream) + ret.write("stream", " pass\n") # special empty struct if inline and not head.struct.members: - print(" pass\n", file=self.stream) + ret.write("stream", " pass\n") self.names.append(head.struct.name) log.debug("Head finished for %s", head.name) self.head_generated.add(head.name) + return ret + @cache.cached() def StructureBody(self, body, inline=False): + ret = GeneratorResult() if body.name in self.body_generated: log.debug("Skipping - Body already generated for %s", body.name) - return + return ret log.debug("Body start for %s", body.name) fields = [] methods = [] @@ -665,7 +776,7 @@ def StructureBody(self, body, inline=False): # handled inline Vs dependent log.debug("body inline:%s for structure %s", inline, body.struct.name) if not inline: - prefix = "%s." % body.struct.name + prefix = f"{body.struct.name}." else: prefix = " " if methods: @@ -675,14 +786,15 @@ def StructureBody(self, body, inline=False): # LXJ: we pack all the time, because clang gives a precise field offset # per target architecture. No need to defer to ctypes logic for that. if fields: - print("%s_pack_ = 1 # source:%s" % (prefix, body.struct.packed), file=self.stream) + ret.write("stream", f"{prefix}_pack_ = True # source:{body.struct.packed}") if body.struct.bases: if len(body.struct.bases) == 1: # its a Struct or a simple Class - self._generate(body.struct.bases[0].get_body(), inline) + ret.update( + self._generate(body.struct.bases[0].get_body(), inline)) else: # we have a multi-parent inheritance for b in body.struct.bases: - self._generate(b.get_body(), inline) + ret.update(self._generate(b.get_body(), inline)) # field definition normally span several lines. # Before we generate them, we need to 'import' everything they need. # So, call type_name for each field once, @@ -694,45 +806,51 @@ def StructureBody(self, body, inline=False): for f in fields: # _anonymous_ fields are fields of type Structure or Union, # that have no name. - if not f.name and isinstance(f.type, (typedesc.Structure, typedesc.Union)): - unnamed_fields[f] = "_%d" % len(unnamed_fields) + if not f.name and isinstance( + f.type, (typedesc.Structure, typedesc.Union)): + unnamed_fields[f] = f"_{len(unnamed_fields)}" if unnamed_fields: - print( - "%s_anonymous_ = %r" % (prefix, unnamed_fields.values()), - file=self.stream, - ) + ret.write("stream", f"{prefix}_anonymous_ = {unnamed_fields.values()}") if len(fields) > 0: - print("%s_fields_ = [" % prefix, file=self.stream) + ret.write("stream", f"{prefix}_fields_ = [") + if self.generate_locations and body.struct.location: - print(" # %s %s" % body.struct.location, file=self.stream) + ret.write("stream", f" # {body.struct.name} {body.struct.location}") + index = 0 for f in fields: + if inline: + ret.write("stream", " ", end='') fieldname = unnamed_fields.get(f, f.name) type_name = self.type_name(f.type) # handle "__" prefixed names by using a wrapper if type_name.startswith("__"): - type_name = "globals()['%s']" % type_name + type_name = f"globals()['{type_name}']" # a bitfield needs a triplet if f.is_bitfield is False: - print(" ('%s', %s)," % (fieldname, type_name), file=self.stream) + ret.write("stream", f" ('{fieldname}', {type_name}),") else: # FIXME: Python bitfield is int32 only. # from clang.cindex import TypeKind # print fieldname # import code # code.interact(local=locals()) - print(" ('%s', %s, %s)," % (fieldname, self.type_name(f.type), f.bits), file=self.stream) + ret.write( + "stream", + f" ('{fieldname}', {self.type_name(f.type)}, {f.bits})," + ) if inline: - print(prefix, end=" ", file=self.stream) - print("]\n", file=self.stream) - log.debug("Body finished for %s", body.name) + ret.write("stream", " ", end='') + ret.write("stream", "]\n") + log.debug('Body finished for %s', body.name) self.body_generated.add(body.name) + return ret def find_library_with_func(self, func): if hasattr(func, "dllname"): return func.dllname name = func.name if os.name == "posix" and sys.platform == "darwin": - name = "_%s" % name + name = f"_{name}" for dll in self.searched_dlls: try: getattr(dll, name) @@ -745,84 +863,114 @@ def find_library_with_func(self, func): _c_libraries = None def need_CLibraries(self): - # Create a '_libraries' doctionary in the generated code, if + ret = GeneratorResult() + # Create a '_libraries' dictionary in the generated code, if # it not yet exists. Will map library pathnames to loaded libs. if self._c_libraries is None: self._c_libraries = {} - print("_libraries = {}", file=self.imports) + ret.write("imports", "_libraries = {}") + return ret _stdcall_libraries = None def need_WinLibraries(self): # Create a '_stdcall_libraries' doctionary in the generated code, if # it not yet exists. Will map library pathnames to loaded libs. + ret = GeneratorResult() if self._stdcall_libraries is None: self._stdcall_libraries = {} - print("_stdcall_libraries = {}", file=self.imports) + ret.write("imports", "_stdcall_libraries = {}") + return ret _dll_stub_issued = False - def get_sharedlib(self, library, cc, stub=False): + def get_sharedlib(self, ret, library, cc, stub=False): # deal with missing -l with a stub stub_comment = "" + library_name = repr(library._name) + library_filepath = repr(library._filepath) if stub and not self._dll_stub_issued: self._dll_stub_issued = True stub_comment = " FunctionFactoryStub() # " - print("""class FunctionFactoryStub: - def __getattr__(self, _): - return ctypes.CFUNCTYPE(lambda y:y) -""", file=self.imports) - print("# libraries['FIXME_STUB'] explanation", file=self.imports) - print("# As you did not list (-l libraryname.so) a library that exports this function", file=self.imports) - print("# This is a non-working stub instead. ", file=self.imports) - print("# You can either re-run clan2py with -l /path/to/library.so",file=self.imports) - print("# Or manually fix this by comment the ctypes.CDLL loading", file=self.imports) + ret.write("imports", textwrap.dedent(""" + class FunctionFactoryStub: + def __getattr__(self, _): + return ctypes.CFUNCTYPE(lambda y:y) + """)) + ret.write("imports", "# libraries['FIXME_STUB'] explanation") + ret.write( + "imports", + "# As you did not list (-l libraryname.so) a library that " + "exports this function" + ) + ret.write("imports", "# This is a non-working stub instead. ") + ret.write( + "imports", + "# You can either re-run clan2py with -l /path/to/library.so" + ) + ret.write( + "imports", + "# Or manually fix this by comment the ctypes.CDLL loading" + ) # generate windows call if cc == "stdcall": - self.need_WinLibraries() + ret.update(self.need_WinLibraries()) if library._name not in self._stdcall_libraries: - _ = "_stdcall_libraries[%r] =%s ctypes.WinDLL(%r)" % (library._name, stub_comment, library._filepath) - print(_, file=self.imports) + ret.write( + "imports", + f"_stdcall_libraries[{library_name}] " + f"={stub_comment} ctypes.WinDLL({library_filepath})" + ) + self._stdcall_libraries[library._name] = None - return "_stdcall_libraries[%r]" % library._name + return f"_stdcall_libraries[{library_name}]" # generate clinux call - self.need_CLibraries() + ret.update(self.need_CLibraries()) if self.preloaded_dlls != []: global_flag = ", mode=ctypes.RTLD_GLOBAL" else: global_flag = "" if library._name not in self._c_libraries: - print("_libraries[%r] =%s ctypes.CDLL(%r%s)" % (library._name, stub_comment, library._filepath, global_flag), - file=self.imports) + ret.write( + "imports", + f"_libraries[{library_name}] ={stub_comment} ctypes.CDLL({library_name}{global_flag})" + ) self._c_libraries[library._name] = None - return "_libraries[%r]" % library._name + return f"_libraries[{library_name}]" _STRING_defined = False def need_STRING(self): + ret = GeneratorResult() if self._STRING_defined: return - print("STRING = c_char_p", file=self.imports) + ret.write("imports", "STRING = c_char_p") self._STRING_defined = True - return + return ret _WSTRING_defined = False def need_WSTRING(self): + ret = GeneratorResult() if self._WSTRING_defined: return - print("WSTRING = c_wchar_p", file=self.imports) + ret.write("imports", "WSTRING = c_wchar_p") self._WSTRING_defined = True - return + return ret + _functiontypes = 0 + _notfound_functiontypes = 0 + + @cache.cached() def Function(self, func): + ret = GeneratorResult() # FIXME: why do we call this ? it does nothing if self.generate_comments: - self.print_comment(func) - self._generate(func.returns) - self.generate_all(func.iterArgTypes()) + ret.update(self.print_comment(func)) + ret.update(self._generate(func.returns)) + ret.update(self.generate_all(func.iterArgTypes())) # useful code args = [self.type_name(a) for a in func.iterArgTypes()] @@ -833,28 +981,25 @@ def Function(self, func): # library = self.find_library_with_func(func) if library: - libname = self.get_sharedlib(library, cc) + libname = self.get_sharedlib(ret, library, cc) else: class LibraryStub: _filepath = "FIXME_STUB" _name = "FIXME_STUB" - libname = self.get_sharedlib(LibraryStub(), cc, stub=True) + libname = self.get_sharedlib(ret, LibraryStub(), cc, stub=True) - argnames = [a or "p%d" % (i + 1) for i, a in enumerate(func.iterArgNames())] + argnames = tuple(a or f"p{i + 1}"for i, a in enumerate(func.iterArgNames())) if self.generate_locations and func.location: - print("# %s %s" % func.location, file=self.stream) + ret.write("stream", f"# {func.name} {func.location}") # Generate the function decl code - print("%s = %s.%s" % (func.name, libname, func.name), file=self.stream) - print( - "%s.restype = %s" % (func.name, self.type_name(func.returns)), - file=self.stream, - ) + ret.write("stream", f"{func.name} = {libname}.{func.name}") + ret.write("stream", f"{func.name}.restype = {self.type_name(func.returns)}") if self.generate_comments: - print("# %s(%s)" % (func.name, ", ".join(argnames)), file=self.stream) - print("%s.argtypes = [%s]" % (func.name, ", ".join(args)), file=self.stream) + ret.write("stream", f"# {func.name}({', '.join(argnames)})") + ret.write("stream", f"{func.name}.argtypes = [{', '.join(args)}]") if self.generate_docstrings: @@ -867,23 +1012,28 @@ def typeString(typ): return "unknown" argsAndTypes = zip([typeString(t) for t in func.iterArgTypes()], argnames) - print( - '{funcname}.__doc__ = """{ret} {funcname}({args})\n' - ' {file}:{line}"""'.format( - funcname=func.name, - args=", ".join(["%s %s" % i for i in argsAndTypes]), - file=func.location[0], - line=func.location[1], - ret=typeString(func.returns), - ), - file=self.stream, + argsAndTypes = ", ".join([f"{i[0]} {i[1]}" for i in argsAndTypes]) + file = func.location[0] + line = func.location[1] + ret.write( + "stream", + f'{func.name}.__doc__ = """{func.returns} {func.name}({argsAndTypes})\n' + f' {file}:{line}"""'.format( + ) ) self.names.append(func.name) self._functiontypes += 1 - return + return ret + @cache.cached_pure_method() def FundamentalType(self, _type): + ret = GeneratorResult() + self._get_fundamental_typename(_type) + # there is actually nothing to generate here for FundamentalType + return ret + + def _get_fundamental_typename(self, _type): """Returns the proper ctypes class name for a fundamental type 1) activates generation of appropriate headers for @@ -895,72 +1045,89 @@ def FundamentalType(self, _type): if _type.name in ["None", "c_long_double_t", "c_uint128", "c_int128"]: self.enable_fundamental_type_wrappers() return _type.name - return "ctypes.%s" % _type.name + + return f"ctypes.{_type.name}" ######## + @cache.cached() def _generate(self, item, *args): """ wraps execution of specific methods.""" + ret = GeneratorResult() if item in self.done: - return + return ret # verbose output with location. if self.generate_locations and item.location: - print("# %s:%d" % item.location, file=self.stream) + ret.write("stream," f"# {item.name or item}:{item.location}") if self.generate_comments: - self.print_comment(item) + ret.update(self.print_comment(item)) log.debug("generate %s, %s", item.__class__.__name__, item.name) # to avoid infinite recursion, we have to mark it as done # before actually generating the code. self.done[item] = True # go to specific treatment mth = getattr(self, type(item).__name__) - mth(item, *args) - return + ret.update(mth(item, *args)) + return ret + @cache.cached_pure_method() def print_comment(self, item): + ret = GeneratorResult() if item.comment is None: return - for _ in textwrap.wrap(item.comment, 78): - print("# %s" % _, file=self.stream) - return + for comment in textwrap.wrap(item.comment, 78): + ret.write("stream", f"# {comment}") + return ret def generate_all(self, items): + ret = GeneratorResult() for item in items: - self._generate(item) - return + ret.update(self._generate(item)) + return ret - def generate_items(self, items): + def generate_items(self, items, verbose=False): # items = set(items) + ret = GeneratorResult() loops = 0 + items = list(items) while items: loops += 1 self.more = collections.OrderedDict() - self.generate_all(items) + ret.update(self.generate_all(tuple(items))) # items |= self.more , but keeping ordering _s = set(items) - [items.append(k) for k in self.more.keys() if k not in _s] + for k in self.more.keys(): + if k not in _s: + items.append(k) # items -= self.done, but keep ordering - _done = self.done.keys() + _done = self.done for i in list(items): if i in _done: items.remove(i) - return loops + if verbose: + log.info("needed %d loop(s)" % loops) + return ret - def generate(self, parser, items): + def generate(self, parser, items, verbose=False): self.generate_headers(parser) self.generate_code(items) - def generate_code(self, items): - print( - "\n".join( - ["ctypes.CDLL('%s', ctypes.RTLD_GLOBAL)" % preloaded_dll for preloaded_dll in self.preloaded_dlls] - ), - file=self.imports, + def generate_code(self, items, verbose=False): + ret = GeneratorResult() + ret.write( + "imports", + "\n".join([ + f"ctypes.CDLL({preloaded_dll}', ctypes.RTLD_GLOBAL)" + for preloaded_dll + in self.preloaded_dlls] + ) ) - loops = self.generate_items(items) + ret.update(self.generate_items(items, verbose=verbose)) + print(ret.get("imports"), file=self.imports) + print(ret.get("stream"), file=self.stream) self.output.write(self.imports.getvalue()) self.output.write("\n\n") @@ -970,25 +1137,18 @@ def generate_code(self, items): # text Wrapper doesn't work for the first line in certain cases. print(text, file=self.output) # doesn't work for the first line in certain cases. - wrapper = textwrap.TextWrapper(break_long_words=False, initial_indent=" ", subsequent_indent=" ") - text = "[%s]" % ", ".join([repr(str(n)) for n in sorted(self.names)]) + wrapper = textwrap.TextWrapper(break_long_words=False, initial_indent=" ", + subsequent_indent=" ") + text = f"[{', '.join([repr(str(n)) for n in sorted(self.names)])}]" for line in wrapper.wrap(text): print(line, file=self.output) - return loops - def print_stats(self, stream): - total = ( - self._structures - + self._functiontypes - + self._enumtypes - + self._typedefs - + self._pointertypes - + self._arraytypes - ) + total = self._structures + self._functiontypes + self._enumtypes + self._typedefs + \ + self._pointertypes + self._arraytypes print("###########################", file=stream) - print("# Symbols defined:", file=stream) - print("#", file=stream) + print("# Symbols defined:") + print("#") print("# Variables: %5d" % self._variables, file=stream) print("# Struct/Unions: %5d" % self._structures, file=stream) print("# Functions: %5d" % self._functiontypes, file=stream) @@ -1002,7 +1162,6 @@ def print_stats(self, stream): print("#", file=stream) print("# Total symbols: %5d" % total, file=stream) print("###########################", file=stream) - return ################################################################ @@ -1023,13 +1182,14 @@ def generate_code( generate_locations=False, filter_location=False, flags=None, + advanced_macro=False, ): # expressions is a sequence of compiled regular expressions, # symbols is a sequence of names parser = clangparser.Clang_Parser(flags or []) # if macros are not needed, use a faster TranslationUnit if typedesc.Macro in types: - parser.activate_macros_parsing() + parser.activate_macros_parsing(advanced_macro) if generate_comments is True: parser.activate_comment_parsing() @@ -1102,7 +1262,6 @@ def generate_code( # add some headers and ctypes import gen.generate_headers(parser) # make the structures - loops = gen.generate_code(items) + gen.generate_code(tuple(items), verbose) if verbose: gen.print_stats(sys.stderr) - log.info("needed %d loop(s)", loops) diff --git a/ctypeslib/codegen/cursorhandler.py b/ctypeslib/codegen/cursorhandler.py index 46fc5d0..e3369f7 100644 --- a/ctypeslib/codegen/cursorhandler.py +++ b/ctypeslib/codegen/cursorhandler.py @@ -3,18 +3,103 @@ import logging import re -from clang.cindex import CursorKind, LinkageKind, TypeKind, TokenKind +from ctypeslib.codegen.cindex import CursorKind, LinkageKind, TypeKind, TokenKind from ctypeslib.codegen import typedesc from ctypeslib.codegen.handler import ClangHandler from ctypeslib.codegen.handler import CursorKindException from ctypeslib.codegen.handler import DuplicateDefinitionException from ctypeslib.codegen.handler import InvalidDefinitionError -from ctypeslib.codegen.util import log_entity +from ctypeslib.codegen.cache import cached_pure_method +from ctypeslib.codegen.preprocess import ( + is_identifier, + from_c_int_literal, + from_c_float_literal, + from_c_string_literal, + process_c_literals, + process_macro_function, + remove_outermost_parentheses, +) +from ctypeslib.codegen.util import ( + contains_invalid_code, + expand_macro_function, + log_entity, +) + log = logging.getLogger('cursorhandler') +class CursorTokens: + def __init__(self, tokens): + self._tokens = list(tokens) + self._index = 0 + + @property + def index(self): + return self._index + + def __len__(self): + return len(self._tokens) + + def __iter__(self): + return iter(self._tokens) + + def __getitem__(self, i): + return self._tokens[i] + + def __bool__(self): + return self._index < len(self._tokens) + + @property + def current(self): + if not self: + return None + return self._tokens[self._index] + + def consume(self, count=1): + if self: + ret = self.current + self._index += count + return ret + + def consume_lit(self, lit): + if self.current.spelling == lit: + self.consume() + return True + return False + + +CharTypes = [ + TypeKind.CHAR_U, + TypeKind.UCHAR, + TypeKind.CHAR16, + TypeKind.CHAR32, + TypeKind.CHAR_S, + TypeKind.SCHAR, + TypeKind.WCHAR, +] + +IntegerTypes = [ + TypeKind.USHORT, + TypeKind.UINT, + TypeKind.ULONG, + TypeKind.ULONGLONG, + TypeKind.UINT128, + TypeKind.SHORT, + TypeKind.INT, + TypeKind.LONG, + TypeKind.LONGLONG, + TypeKind.INT128, +] + +FloatTypes = [ + TypeKind.FLOAT, + TypeKind.DOUBLE, + TypeKind.LONGDOUBLE, +] + + class CursorHandler(ClangHandler): """ Factory objects that handles Cursor Kind and transform them into typedesc. @@ -33,8 +118,10 @@ class CursorHandler(ClangHandler): def __init__(self, parser): ClangHandler.__init__(self, parser) + @cached_pure_method() def parse_cursor(self, cursor): - mth = getattr(self, cursor.kind.name) + name = cursor.kind.name + mth = getattr(self, name) return mth(cursor) ########################################################################## @@ -82,11 +169,26 @@ def UNEXPOSED_EXPR(self, cursor): def DECL_REF_EXPR(self, cursor): return cursor.displayname + def _cast_list_expr(self, type_kind, value): + if not isinstance(value, str): + return value + try: + if type_kind in IntegerTypes: + return from_c_int_literal(value, self.parser.get_pointer_width()) + elif type_kind in FloatTypes: + return from_c_float_literal(value) + elif type_kind in CharTypes: + return value + except ValueError: + return value + @log_entity def INIT_LIST_EXPR(self, cursor): """Returns a list of literal values.""" values = [self.parse_cursor(child) for child in list(cursor.get_children())] + element_type = cursor.type.get_array_element_type().kind + values = list(map(lambda v: self._cast_list_expr(element_type, v), values)) return values ################################ @@ -148,9 +250,10 @@ def ENUM_DECL(self, cursor): return self.get_registered(name) align = cursor.type.get_align() size = cursor.type.get_size() - obj = self.register(name, typedesc.Enumeration(name, size, align)) + obj = typedesc.Enumeration(name, size, align) self.set_location(obj, cursor) self.set_comment(obj, cursor) + obj = self.register(name, obj) # parse all children for child in cursor.get_children(): self.parse_cursor(child) # FIXME, where is the starElement @@ -167,15 +270,16 @@ def FUNCTION_DECL(self, cursor): attributes = [] extern = False obj = typedesc.Function(name, returns, attributes, extern) + obj = self.register(name, obj) for arg in cursor.get_arguments(): arg_obj = self.parse_cursor(arg) # if arg_obj is None: # code.interact(local=locals()) obj.add_argument(arg_obj) # code.interact(local=locals()) - self.register(name, obj) self.set_location(obj, cursor) self.set_comment(obj, cursor) + obj = self.update_register(name, obj) return obj @log_entity @@ -183,7 +287,7 @@ def PARM_DECL(self, cursor): """Handles parameter declarations.""" # try and get the type. If unexposed, The canonical type will work. _type = cursor.type - _name = cursor.spelling + name = cursor.spelling if (self.is_array_type(_type) or self.is_fundamental_type(_type) or self.is_pointer_type(_type) or @@ -197,7 +301,7 @@ def PARM_DECL(self, cursor): _argtype = self.parse_cursor_type(_type) else: _argtype = self.get_registered(_argtype_name) - obj = typedesc.Argument(_name, _argtype) + obj = typedesc.Argument(name, _argtype) self.set_location(obj, cursor) self.set_comment(obj, cursor) return obj @@ -230,9 +334,10 @@ def TYPEDEF_DECL(self, cursor): 'Bad TYPEREF parsing in TYPEDEF_DECL: %s' % (_type.spelling)) # register the type - obj = self.register(name, typedesc.Typedef(name, p_type)) + obj = typedesc.Typedef(name, p_type) self.set_location(obj, cursor) self.set_comment(obj, cursor) + obj = self.register(name, obj) return obj @log_entity @@ -252,9 +357,10 @@ def VAR_DECL(self, cursor): log.debug('VAR_DECL: _type:%s', _type.name) log.debug('VAR_DECL: _init:%s', init_value) log.debug('VAR_DECL: location:%s', getattr(cursor, 'location')) - obj = self.register(name, typedesc.Variable(name, _type, init_value, extern)) + obj = typedesc.Variable(name, _type, init_value, extern) self.set_location(obj, cursor) self.set_comment(obj, cursor) + obj = self.register(name, obj) return True def _VAR_DECL_type(self, cursor): @@ -278,7 +384,8 @@ def _VAR_DECL_type(self, cursor): # for example, extern Function pointer if self.is_unexposed_type(_ctype.get_pointee()): _type = self.parse_cursor_type( - _ctype.get_canonical().get_pointee()) + _ctype.get_canonical().get_pointee() + ) elif _ctype.get_pointee().kind == TypeKind.FUNCTIONPROTO: # Function pointers # Arguments are handled in here @@ -332,8 +439,18 @@ def countof(k, l): init_value = None else: log.debug('VAR_DECL: default init_value: %s', init_value) + + def cast_value(cursor_kind, value): + if cursor_kind == CursorKind.INTEGER_LITERAL: + return int(value) + elif cursor_kind == CursorKind.FLOATING_LITERAL: + return float(value) + else: + return value if len(init_value) > 0: - init_value = init_value[0][1] + init_value = list(map(lambda i: cast_value(*i), init_value)) + if len(init_value) == 1: + init_value = init_value[0] return init_value def _get_var_decl_init_value(self, _ctype, children): @@ -403,6 +520,7 @@ def _get_var_decl_init_value_single(self, _ctype, child): log.debug('_get_var_decl_init_value_single: returns %s', str(init_value)) return init_value + @cached_pure_method() def _clean_string_literal(self, cursor, value): # strip wchar_t type prefix for string/character # indicatively: u8 for utf-8, u for utf-16, U for utf32 @@ -411,37 +529,44 @@ def _clean_string_literal(self, cursor, value): # string prefixes https://en.cppreference.com/w/cpp/language/string_literal # integer suffixes https://en.cppreference.com/w/cpp/language/integer_literal if cursor.kind in [CursorKind.CHARACTER_LITERAL, CursorKind.STRING_LITERAL]: - # clean prefix - value = re.sub(r'''^(L|u8|u|U)(R|"|')''', r'\2', value) - # R for raw strings - # we need to remove the raw-char-sequence prefix,suffix - if value[0] == 'R': - s = value[1:] - # if there is no '(' in the 17 first char, its not valid - offset = s[:17].index('(') - delimiter = s[1:offset] # we skip the " - value = s[offset + 1:-offset - 1] - return value - # we strip string delimiters - return value[1:-1] - elif cursor.kind == CursorKind.MACRO_INSTANTIATION: - # prefix = value[:3].split('"')[0] - return value + return from_c_string_literal(value) elif cursor.kind == CursorKind.MACRO_DEFINITION: - c = value[-1] - if c in ['"', "'"]: - value = re.sub('''^L%s''' % c , c, value) - else: - # unsigned int / long int / unsigned long int / long long int / unsigned long long int - # this works and doesn't eat STRING values because no '"' is before $ in the regexp. - # FIXME combinaisons of u/U, l/L, ll/LL, and combined, plus z/Z combined with u/U - value = re.sub("(u|U|l|L|ul|UL|ll|LL|ull|ULL|z|Z|zu|ZU)$", "", value) - return value + return process_c_literals(value) else: - pass - return value + return value + + @cached_pure_method() + def _macro_args_handling(self, tokens, call_args=False): + if tokens.current is None: + return tuple() + args = [] + if not tokens.consume_lit("("): + return None + balance = 0 + while tokens: + if balance == 0 and tokens.consume_lit(")"): + break + if tokens.consume_lit(","): + continue + if tokens.consume_lit("("): + balance += 1 + elif tokens.consume_lit(")"): + balance -= 1 + elif is_identifier(str(tokens.current.spelling)): + args.append(tokens.consume().spelling) + else: + if call_args: + args.append(tokens.consume().spelling) + else: + return None + return tuple(args) + + @cached_pure_method() + def _get_cursor_tokens(self, cursor): + return CursorTokens(cursor.get_tokens()) @log_entity + @cached_pure_method() def _literal_handling(self, cursor): """Parse all literal associated with this cursor. @@ -453,22 +578,25 @@ def _literal_handling(self, cursor): because some literal might need cleaning.""" # FIXME #77, internal integer literal like __clang_major__ are not working here. # tokens == [] , because ??? clang problem ? so there is no spelling available. - tokens = list(cursor.get_tokens()) + tokens = self._get_cursor_tokens(cursor) log.debug('literal has %d tokens.[ %s ]', len(tokens), ' '.join([str(t.spelling) for t in tokens])) - if len(tokens) == 1 and cursor.kind == CursorKind.STRING_LITERAL: - # use a shortcut that works for unicode - value = tokens[0].spelling - value = self._clean_string_literal(cursor, value) - return value - elif cursor.kind == CursorKind.STRING_LITERAL: - # use a shortcut - does not work on unicode var_decl - value = cursor.displayname - value = self._clean_string_literal(cursor, value) - return value + + if cursor.kind == CursorKind.STRING_LITERAL: + if len(tokens) == 1: + # use a shortcut that works for unicode + value = tokens[0].spelling + value = self._clean_string_literal(cursor, value) + return value + else: + # use a shortcut - does not work on unicode var_decl + value = cursor.displayname + value = self._clean_string_literal(cursor, value) + return value final_value = [] # code.interact(local=locals()) log.debug('cursor.type:%s', cursor.type.kind.name) - for i, token in enumerate(tokens): + while tokens: + token = tokens.current value = token.spelling log.debug('token:%s tk.kd:%11s tk.cursor.kd:%15s cursor.kd:%15s', token.spelling, token.kind.name, token.cursor.kind.name, @@ -477,11 +605,17 @@ def _literal_handling(self, cursor): # but only in specific case: ';' endl, or part of list_expr if (token.kind == TokenKind.PUNCTUATION and (token.cursor.kind == CursorKind.INVALID_FILE or - token.cursor.kind == CursorKind.INIT_LIST_EXPR)): + token.cursor.kind == CursorKind.INIT_LIST_EXPR)): log.debug('IGNORE token %s', value) + # consume token + tokens.consume() + token = tokens.current continue elif token.kind == TokenKind.COMMENT: log.debug('Ignore comment %s', value) + # consume token + tokens.consume() + token = tokens.current continue # elif token.cursor.kind == CursorKind.VAR_DECL: elif token.location not in cursor.extent: @@ -497,57 +631,127 @@ def _literal_handling(self, cursor): # Lexer is choking ? # FIXME BUG: token.location not in cursor.extent # code.interact(local=locals()) + # consume token + tokens.consume() + token = tokens.current continue # Cleanup specific c-lang or c++ prefix/suffix for POD types. if token.cursor.kind == CursorKind.INTEGER_LITERAL: # strip type suffix for constants - value = value.replace('L', '').replace('U', '') - value = value.replace('l', '').replace('u', '') - if value[:2] == '0x' or value[:2] == '0X': - value = '0x%s' % value[2:] # "int(%s,16)"%(value) - else: - value = int(value) + value = str(from_c_int_literal(value, self.parser.get_pointer_width())) + # consume token + tokens.consume() + token = tokens.current elif token.cursor.kind == CursorKind.FLOATING_LITERAL: # strip type suffix for constants - value = value.replace('f', '').replace('F', '') - value = float(value) - elif (token.cursor.kind == CursorKind.CHARACTER_LITERAL or - token.cursor.kind == CursorKind.STRING_LITERAL): + value = str(from_c_float_literal(value)) + # consume token + tokens.consume() + token = tokens.current + elif token.cursor.kind == CursorKind.CHARACTER_LITERAL: + value = self._clean_string_literal(token.cursor, value) + # consume token + tokens.consume() + token = tokens.current + elif token.cursor.kind == CursorKind.STRING_LITERAL: value = self._clean_string_literal(token.cursor, value) + # consume token + tokens.consume() + token = tokens.current elif token.cursor.kind == CursorKind.MACRO_INSTANTIATION: # get the macro value value = self.get_registered(value).body + # consume token + tokens.consume() + token = tokens.current # already cleaned value = self._clean_string_literal(token.cursor, value) elif token.cursor.kind == CursorKind.MACRO_DEFINITION: tk = token.kind - if i == 0: + if tokens.index == 0: # ignore, macro name - pass + # consume token + tokens.consume() + token = tokens.current elif token.kind == TokenKind.LITERAL: # and just clean it value = self._clean_string_literal(token.cursor, value) + # consume token + tokens.consume() + token = tokens.current elif token.kind == TokenKind.IDENTIFIER: # log.debug("Ignored MACRO_DEFINITION token identifier : %s", value) # Identifier in Macro... Not sure what to do with that. if self.is_registered(value): # FIXME: if Macro is not a simple value replace, it should not be registered in the first place # parse that, try to see if there is another Macro in there. - value = self.get_registered(value).body - log.debug("Found MACRO_DEFINITION token identifier : %s", value) + if hasattr(self.get_registered(value), "body"): + macro = self.get_registered(value) + if contains_invalid_code(macro): + log.debug("MACRO_DEFINITION contains invalid code(s) : %s", value) + value = typedesc.UndefinedIdentifier(value) + # consume token + tokens.consume() + token = tokens.current + else: + log.debug("Found MACRO_DEFINITION token identifier : %s", value) + if macro.args: + tokens.consume() + token = tokens.current + call_args = self._macro_args_handling(tokens, call_args=True) + expansion_limit = None + if self.parser.advanced_macro: + expansion_limit = 1 + value = expand_macro_function( + macro, call_args, namespace=self.parser.interpreter_namespace, limit=expansion_limit) + token = tokens.current + else: + value = macro.body + # consume token + tokens.consume() + token = tokens.current + else: + value = typedesc.UndefinedIdentifier(value) + log.debug("Undefined MACRO_DEFINITION token identifier : %s", value) + # consume token + tokens.consume() + token = tokens.current else: value = typedesc.UndefinedIdentifier(value) log.debug("Undefined MACRO_DEFINITION token identifier : %s", value) - pass + # consume token + tokens.consume() + token = tokens.current elif token.kind == TokenKind.KEYWORD: log.debug("Got a MACRO_DEFINITION referencing a KEYWORD token.kind: %s", token.kind.name) value = typedesc.UndefinedIdentifier(value) - elif token.kind in [TokenKind.COMMENT, TokenKind.PUNCTUATION]: + # consume token + tokens.consume() + token = tokens.current + elif token.kind == TokenKind.PUNCTUATION: + # FIXME: handle PUNCTUATION # log.debug("Ignored MACRO_DEFINITION token.kind: %s", token.kind.name) - pass + # consume token + tokens.consume() + token = tokens.current + else: + log.warning("Unhandled token %s" % token.kind) + # consume token + tokens.consume() + token = tokens.current + elif token.kind == TokenKind.PUNCTUATION: + # consume token + tokens.consume() + token = tokens.current + else: + log.warning("Unhandled token %s" % token.kind) + # consume token + tokens.consume() + token = tokens.current # add token if value is not None: final_value.append(value) + # return the EXPR # code.interact(local=locals()) # FIXME, that will break. We need constant type return @@ -671,10 +875,10 @@ def _record_decl(self, cursor, _output_type, num=None): # save the type in the registry. Useful for not looping in case of # members with forward references obj = _output_type(name, align, None, bases, size, packed=False) - self.register(name, obj) self.set_location(obj, cursor) self.set_comment(obj, cursor) declared_instance = True + obj = self.register(name, obj) else: obj = self.get_registered(name) declared_instance = False @@ -688,6 +892,7 @@ def _record_decl(self, cursor, _output_type, num=None): for field in fields: log.debug('creating FIELD_DECL for %s/%s', field.kind.name, field.spelling) members.append(self.FIELD_DECL(field)) + obj.members = members # FIXME BUG clang: anonymous structure field with only one anonymous field # is not a FIELD_DECL. does not appear in get_fields() !!! # @@ -710,11 +915,10 @@ def _record_decl(self, cursor, _output_type, num=None): # by now, the type is registered. if not declared_instance: log.debug('_record_decl: %s was previously registered', name) - obj = self.get_registered(name) - obj.members = members # obj.packed = packed # final fixup self._fixup_record(obj) + obj = self.update_register(name, obj) return obj def _fixup_record_bitfields_type(self, s): @@ -1071,16 +1275,6 @@ def MACRO_DEFINITION(self, cursor): used with TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD. By default, macro are not parsed. requires -k m || parser.activate_macros_parsing() """ - # macro parsing takes a LOT of time. - # ignore system macro - if (not hasattr(cursor, 'location') or cursor.location is None or - cursor.location.file is None): - # keep track of sizes from clang directly - # but we already did that elsewhere in clangparser.py make_ctypes_convertor - # if cursor.displayname.startswith('__SIZEOF_'): - # typ = cursor.displayname[len('__SIZEOF_'):-2] - # self.__sizeof[typ] = list(cursor.get_tokens())[1].spelling - return False name = self.get_unique_name(cursor) # MACRO_DEFINITION are a list of Tokens # .kind = {IDENTIFIER, KEYWORD, LITERAL, PUNCTUATION, COMMENT ? } @@ -1092,30 +1286,53 @@ def MACRO_DEFINITION(self, cursor): # args should be filled when () are in tokens, args = None if isinstance(tokens, list): - # TODO, if there is an UndefinedIdentifier, we need to scrap the whole thing to comments. - # unknowns = [_ for _ in tokens if isinstance(_, typedesc.UndefinedIdentifier)] - # if len(unknowns) > 0: - # value = tokens - # elif len(tokens) == 2: if len(tokens) == 2: # #define key value value = tokens[1] elif len(tokens) == 3 and tokens[1] == '-': value = ''.join(tokens[1:]) elif tokens[1] == '(': - # TODO, differentiate between function-like macro and expression in () # function macro or an expression. - str_tokens = [str(_) for _ in tokens[1:tokens.index(')')+1]] - args = ''.join(str_tokens).replace(',', ', ') - str_tokens = [str(_) for _ in tokens[tokens.index(')')+1:]] - value = ''.join(str_tokens) - elif len(tokens) > 2: + tokens = remove_outermost_parentheses(tokens[1:]) + if tokens and tokens[0] == "(": + # function + str_tokens = "".join((map(str, tokens[0:tokens.index(')') + 1]))).strip() + str_tokens = remove_outermost_parentheses(str_tokens) + args = list(map( + lambda a: a.strip(), str_tokens.split(',') + )) + str_tokens = "".join((map(str, tokens[tokens.index(')') + 1:]))) + if all(map(lambda a: is_identifier(a) and a not in str_tokens, args)): + value = "".join(map(str, tokens)) + else: + value = str_tokens + + elif all(map(lambda a: a == "," or is_identifier(str(a)), tokens)): + # TODO FIX differentiation between function-like macro and expression in () + # no-op function ? + args = list(map( + lambda a: str(a).strip(), tokens + )) + value = "''" + else: + # expression + if not any(map(lambda t: isinstance(t, typedesc.UndefinedIdentifier), tokens)): + value = " ".join(map(str, tokens)) + else: + value = None + elif len(tokens) > 1: # #define key a b c - value = list(tokens[1:]) + if not any(map(lambda t: isinstance(t, typedesc.UndefinedIdentifier), tokens)): + value = " ".join(tokens[1:]) + else: + value = None else: # FIXME no reach ?! # just merge the list of tokens - value = ' '.join(tokens[1:]) + if not any(map(lambda t: isinstance(t, typedesc.UndefinedIdentifier), tokens)): + value = "".join(tokens[1:]) + else: + value = None elif isinstance(tokens, str): # #define only value = True @@ -1129,22 +1346,28 @@ def MACRO_DEFINITION(self, cursor): if name in ['NULL', '__thread'] or value in ['__null', '__thread']: value = None log.debug('MACRO: #define %s%s %s', name, args or '', value) - obj = typedesc.Macro(name, args, value) + func = None + if args: + func = process_macro_function(name, args, value) + if func is not None: + self.parser.interprete(func) + + obj = typedesc.Macro(name, args, value, func) + self.set_location(obj, cursor) + # set the comment in the obj + obj.comment = comment try: self.register(name, obj) except DuplicateDefinitionException: log.info('Redefinition of %s %s->%s', name, self.parser.all[name].args, value) # HACK self.parser.all[name] = obj - self.set_location(obj, cursor) - # set the comment in the obj - obj.comment = comment return True @log_entity def MACRO_INSTANTIATION(self, cursor): """We could use this to count instantiations - so we now, if we need to generate python code or comment for this macro ? """ + so we know, if we need to generate python code or comment for this macro ? """ log.debug('cursor.spelling: %s', cursor.spelling) # log.debug('cursor.kind: %s', cursor.kind.name) # log.debug('cursor.type.kind: %s', cursor.type.kind.name) diff --git a/ctypeslib/codegen/handler.py b/ctypeslib/codegen/handler.py index de30fb3..fb1135e 100644 --- a/ctypeslib/codegen/handler.py +++ b/ctypeslib/codegen/handler.py @@ -35,6 +35,7 @@ class DuplicateDefinitionException(KeyError): pass + ################################################################ class ClangHandler(object): @@ -56,6 +57,9 @@ def get_registered(self, name): def is_registered(self, name): return self.parser.is_registered(name) + def update_register(self, name, obj): + return self.parser.update_register(name, obj) + def remove_registered(self, name): return self.parser.remove_registered(name) diff --git a/ctypeslib/codegen/hash.py b/ctypeslib/codegen/hash.py new file mode 100644 index 0000000..f6f645e --- /dev/null +++ b/ctypeslib/codegen/hash.py @@ -0,0 +1,75 @@ +import collections.abc +import functools +import abc + + +class HashCombinable(abc.ABC): + @abc.abstractmethod + def hash_combine(self, combiner): + pass + + +def hash_value(value): + return HashCombiner().hash_value(value) + + +def hash_combine(args): + return HashCombiner().hash_combine(args) + + +class HashCombiner: + def __init__(self): + self._seen = set() + + def hash_value(self, value): + if isinstance(value, int): + return value + elif isinstance(value, str): + return hash(value) + elif isinstance(value, list): + value = tuple(value) + seen_key = id(value) + if seen_key in self._seen: + return 0 + self._seen.add(seen_key) + if isinstance(value, collections.abc.Mapping): + return self.hash_combine( + self.hash_value(k) ^ self.hash_value(v) + for k, v in sorted(value.items()) + ) + elif isinstance(value, collections.abc.Collection): + return self.hash_combine(value) + elif isinstance(value, HashCombinable): + return value.hash_combine(self) + else: + return hash(value) + + def hash_combine(self, args): + return functools.reduce( + lambda h, v: h ^ self.hash_value(v), + filter(lambda a: id(a) not in self._seen, args), + 0, + ) + + +class hashable_dict(HashCombinable, collections.abc.Mapping): + def __init__(self, mapping): + self._mapping = mapping + self._hash = None + + def hash_combine(self, combiner): + return combiner.hash_value(self._mapping) + + def __hash__(self): + if self._hash is None: + self._hash = self.hash_combine(HashCombiner()) + return self._hash + + def __getitem__(self, key): + return self._mapping[key] + + def __iter__(self): + return iter(self._mapping) + + def __len__(self): + return len(self._mapping) diff --git a/ctypeslib/codegen/preprocess.py b/ctypeslib/codegen/preprocess.py new file mode 100644 index 0000000..3562099 --- /dev/null +++ b/ctypeslib/codegen/preprocess.py @@ -0,0 +1,442 @@ +import textwrap +import itertools +import logging +import re +import ctypes +from collections.abc import Iterable + + +log = logging.getLogger(__name__) + +_c_hexint_literal = r"\b0x[0-9a-fA-F]+" +_c_hexint_literal_regex = re.compile(f"({_c_hexint_literal})(([uU])?([lL])?([lL])?)") +_c_octint_literal = r"\b0[0-7]*" +_c_octint_literal_regex = re.compile(f"({_c_octint_literal})(([uU])?([lL])?([lL])?)") +_c_decint_literal = r"\b[1-9][0-9]*" +_c_decint_literal_regex = re.compile(f"({_c_decint_literal})(([uU])?([lL])?([lL])?)") +_c_int_literal = f"((?:{_c_hexint_literal})|(?:{_c_octint_literal})|(?:{_c_decint_literal}))[uUlL]?[lL]?" +_c_int_literal_regex = re.compile(_c_int_literal) + +_c_decimal_literal = r"((?:(? _int_limits(ctypes.c_int64)[1]: + _type = ctypes.c_uint64 + else: + _type = ctypes.c_int64 + return _type(value).value + + +def process_c_int_literal(value, pointer_width=64): + value = _process_c_literal( + _c_hexint_literal_regex, + value, + repl=lambda m: str(_process_c_int_matched_literal(m, pointer_width, 16)), + ) + value = _process_c_literal( + _c_octint_literal_regex, + value, + repl=lambda m: str(_process_c_int_matched_literal(m, pointer_width, 8)), + ) + value = _process_c_literal( + _c_decint_literal_regex, + value, + repl=lambda m: str(_process_c_int_matched_literal(m, pointer_width, 10)), + ) + return value + + +def process_c_float_literal(value): + return _process_c_literal(_c_decimal_literal_regex, value) + + +def from_c_literal(value): + ret = from_c_int_literal(value) + if ret is not None: + return ret + ret = from_c_float_literal(value) + if ret is not None: + return ret + return value + + +_c_string_literal = r"(?:L|u8|u|U)?(R)?(\"|')((?:(?!\2).)*)\2" +_c_string_literal_regex = re.compile(_c_string_literal) +_c_raw_string_literal = r"([^()\\s]{0,16})\(((?:(?!\1).)*)\)\1?" +_c_raw_string_literal_regex = re.compile(_c_raw_string_literal) + + +def _process_c_string_literal(match, quote=False): + raw_string = match.group(1) is not None + value = match.group(3) + if raw_string: + value = _c_raw_string_literal_regex.sub(r"\2", value) + if not quote: + return value + else: + return f'"{value}"' + + +def from_c_string_literal(value): + return _c_string_literal_regex.sub(_process_c_string_literal, value) + + +def process_c_string_literals(value): + return _c_string_literal_regex.sub( + lambda m: _process_c_string_literal(m, quote=True), value + ) + + +def process_c_literals(value, pointer_width=64): + value = process_c_int_literal(value, pointer_width=64) + value = process_c_float_literal(value) + value = process_c_string_literals(value) + return value + + +_macro_operators = [ + r"##", + r"#", + r"\(", + r"\)", + r"{", + r"}", + r",", + r"!=", + r"\|=", + r"&=", + r"\+=", + r"\*=", + r"/=", + r"%=", + r"\+", + r"-", + r"\*", + r"/", + r">>", + r"<<", + r">>=", + r"<<=", + r":", + r"\?", + r">=", + r"<=", + r"<", + r">", + r"==", + r"=", + r"\"", + r"'", + r"&&", + r"&", + r"\|\|", + r"\|", +] + +_macro_operators_regexes = list(map(lambda r: re.compile(r), _macro_operators)) + +_macro_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)" +_macro_identifier_regex = re.compile("^" + _macro_identifier + "$") + + +def is_identifier(expr): + return _macro_identifier_regex.match(expr) is not None + + +_macro_tokens = "|".join( + [_c_numeric_literal, _macro_identifier] + + list(map(lambda t: f"({t})", _macro_operators)) +) + +_macro_tokenizer = re.compile(_macro_tokens) + +_builtins_map = { + r"const": "", + r"typedef": "", + r"sizeof": "ctypes.sizeof", + r"bool": "ctypes.c_bool", + r"unsigned\s+char": "ctypes.c_ubyte", + r"char": "ctypes.c_byte", + r"wchar": "ctypes.c_wchar", + r"unsigned\s+short\s+int": "ctypes.c_ushort", + r"unsigned\s+short": "ctypes.c_ushort", + r"short\s+int": "ctypes.c_short", + r"short": "ctypes.c_short", + r"unsigned\s+int": "ctypes.c_uint", + r"int": "ctypes.c_int", + r"unsigned\s+long\s+long\s+int": "ctypes.c_ulonglong", + r"unsigned\s+long\s+int": "ctypes.c_ulong", + r"unsigned\s+long\s+long": "ctypes.c_ulonglong", + r"unsigned\s+long": "ctypes.c_ulong", + r"long\s+long\s+int": "ctypes.c_longlong", + r"long\s+long": "ctypes.c_longlong", + r"long\s+int": "ctypes.c_long", + r"long": "ctypes.c_long", + r"size_t": "ctypes.c_size_t", + r"ssize_t": "ctypes.c_ssize_t", + r"float": "ctypes.c_float", + r"long\s+double": "ctypes.c_longdouble", + r"double": "ctypes.c_double", + r"typeof": "type", +} +_builtins_patterns = re.compile( + "|".join(map(lambda p: f"(\\b{p}\\b)", _builtins_map.keys())) +) + + +def replace_builtins(expr): + return _builtins_patterns.sub(lambda m: _builtins_map.get(m.group(1)), expr) + + +_curly_brace_escape_map = { + "{": "{{", + "}": "}}", +} +_curly_brace_escape_patterns = re.compile( + "|".join(map(lambda p: f"({p})", _curly_brace_escape_map.keys())) +) + + +def escape_curly_brace(expr): + return _curly_brace_escape_patterns.sub( + lambda m: _curly_brace_escape_map.get(m.group(0)), expr + ) + + +_escape_quotes_regex = re.compile('(?", method) + if namespace is None: + namespace = {} + return eval(processed_macro, globals(), namespace) + + +def eval_processed_macro(processed_macro, namespace=None, pointer_width=64): + return _eval_processed_macro(processed_macro, namespace, pointer_width, "eval") + + +def exec_processed_macro(processed_macro, namespace=None, pointer_width=64): + return _eval_processed_macro(processed_macro, namespace, pointer_width, "exec") + + +def remove_outermost_parentheses(macro): + if len(macro) < 2: + return macro + if not macro[0] == "(" or not macro[-1] == ")": + return macro + balance = 0 + ret = macro[1:-1] + for c in ret: + if c == "(": + balance += 1 + elif c == ")": + balance -= 1 + if balance < 0: + return macro + if balance != 0: + return macro + else: + return remove_outermost_parentheses(ret) + + +def __attribute__(expr): + return + + +def __const__(expr): + return + + +def __restrict(expr): + return diff --git a/ctypeslib/codegen/typedesc.py b/ctypeslib/codegen/typedesc.py index ca2d191..f57eadc 100644 --- a/ctypeslib/codegen/typedesc.py +++ b/ctypeslib/codegen/typedesc.py @@ -1,20 +1,36 @@ # typedesc.py - classes representing C type descriptions +from ctypeslib.codegen.hash import HashCombinable, HashCombiner -class T(object): + +class T(HashCombinable): + _clang_hash_ = None name = None location = None comment = None + def _attrs_hash(self, combiner): + return combiner.hash_combine( + (v for k, v in vars(self).items() if not (k.startswith("_") or callable(v))) + ) + + def hash_combine(self, combiner): + return self._attrs_hash(combiner) ^ combiner.hash_value(self.__class__.__name__) + + def __hash__(self): + return self.hash_combine(HashCombiner()) + def __repr__(self): kv = self.__dict__ s = [] for k, v in kv.items(): if isinstance(v, T): - s.append('%s=%s(...)' % (k, v.__class__.__name__)) + s.append(f"{k}={v.__class__.__name__}(...)") else: - s.append('%s=%s' % (k, v)) - return "%s(%s)" % (self.__class__.__name__, ','.join(s)) + s.append(f"{k}={v}") + name = self.__class__.__name__ + attrs = ",".join(s) + return f"{name}({attrs})" class Argument(T): @@ -31,12 +47,12 @@ class _HasArgs(T): """Any C type with arguments""" def __init__(self): - self.arguments = [] + self.arguments = tuple() def add_argument(self, arg): if not isinstance(arg, Argument): - raise TypeError('Argument expected, %s instead' % (type(arg))) - self.arguments.append(arg) + raise TypeError("Argument expected, %s instead" % (type(arg))) + self.arguments += (arg,) def iterArgTypes(self): for a in self.arguments: @@ -49,13 +65,14 @@ def iterArgNames(self): def fixup_argtypes(self, cb): # for a in self.arguments: # getattr(cb, a.a.atype = typemap[a.atype] - #import code + # import code # code.interact(local=locals()) pass ################ + class Alias(T): """a C preprocessor alias, like #define A B""" @@ -70,25 +87,24 @@ class Macro(T): """a C preprocessor definition with arguments""" - def __init__(self, name, args, body): + def __init__(self, name, args, body, func): """all arguments are strings, args is the literal argument list *with* the parens around it: Example: Macro("CD_INDRIVE", "(status)", "((int)status > 0)")""" self.name = name self.args = args self.body = body + self.func = func class File(T): - def __init__(self, name): self.name = name class Function(_HasArgs): - def __init__(self, name, returns, attributes, extern): - _HasArgs.__init__(self) + super().__init__() self.name = name self.returns = returns self.attributes = attributes # dllimport, __stdcall__, __cdecl__ @@ -96,40 +112,36 @@ def __init__(self, name, returns, attributes, extern): class Ignored(_HasArgs): - def __init__(self, name): - _HasArgs.__init__(self) + super().__init__() self.name = name class OperatorFunction(_HasArgs): - def __init__(self, name, returns): - _HasArgs.__init__(self) + super().__init__() self.name = name self.returns = returns class FunctionType(_HasArgs): - - def __init__(self, returns, attributes, name=''): - _HasArgs.__init__(self) + def __init__(self, returns, attributes, name=""): + super().__init__() self.returns = returns self.attributes = attributes self.name = "FP_%s" % (name) class Method(_HasArgs): - def __init__(self, name, returns): - _HasArgs.__init__(self) + super().__init__() self.name = name self.returns = returns class FundamentalType(T): - def __init__(self, name, size, align): + super().__init__() self.name = name if name != "void": self.size = int(size) @@ -137,8 +149,8 @@ def __init__(self, name, size, align): class PointerType(T): - def __init__(self, typ, size, align): + super().__init__() self.typ = typ self.size = int(size) self.align = int(align) @@ -146,23 +158,23 @@ def __init__(self, typ, size, align): class Typedef(T): - def __init__(self, name, typ): + super().__init__() self.name = name self.typ = typ class ArrayType(T): - def __init__(self, typ, size): + super().__init__() self.typ = typ self.size = size self.name = "array_%s" % typ.name class StructureHead(T): - def __init__(self, struct): + super().__init__() self.struct = struct @property @@ -171,8 +183,8 @@ def name(self): class StructureBody(T): - def __init__(self, struct): + super().__init__() self.struct = struct @property @@ -181,18 +193,28 @@ def name(self): class _Struct_Union_Base(T): - def get_body(self): return self.struct_body def get_head(self): return self.struct_head + def _attrs_hash(self, combiner): + # prevent infinite recursion with self.struct_head/self.struct_body + return combiner.hash_combine( + ( + v + for k, v in vars(self).items() + if not (k.startswith("_") or k.startswith("struct_") or callable(v)) + ) + ) -class Structure(_Struct_Union_Base): - def __init__(self, name, align, members, bases, size, artificial=None, - packed=False): +class Structure(_Struct_Union_Base): + def __init__( + self, name, align, members, bases, size, artificial=None, packed=False + ): + super().__init__() self.name = name self.align = int(align) self.members = members @@ -205,9 +227,10 @@ def __init__(self, name, align, members, bases, size, artificial=None, class Union(_Struct_Union_Base): - - def __init__(self, name, align, members, bases, size, artificial=None, - packed=False): + def __init__( + self, name, align, members, bases, size, artificial=None, packed=False + ): + super().__init__() self.name = name self.align = int(align) self.members = members @@ -224,10 +247,19 @@ def __init__(self, name, align, members, bases, size, artificial=None, class Field(T): - ''' Change bits if its a bitfield''' - - def __init__(self, name, typ, offset, bits, is_bitfield=False, - is_anonymous=False, is_padding=False): + """ Change bits if its a bitfield""" + + def __init__( + self, + name, + typ, + offset, + bits, + is_bitfield=False, + is_anonymous=False, + is_padding=False, + ): + super().__init__() self.name = name self.type = typ self.offset = offset @@ -238,17 +270,17 @@ def __init__(self, name, typ, offset, bits, is_bitfield=False, class CvQualifiedType(T): - def __init__(self, typ, const, volatile): + super().__init__() self.typ = typ self.const = const self.volatile = volatile - self.name = 'CV_QUAL_%s' % (self.typ.name) + self.name = "CV_QUAL_%s" % (self.typ.name) class Enumeration(T): - def __init__(self, name, size, align): + super().__init__() self.name = name self.size = int(size) self.align = int(align) @@ -259,31 +291,50 @@ def add_value(self, v): class EnumValue(T): - def __init__(self, name, value, enumeration): + super().__init__() self.name = name self.value = value self.enumeration = enumeration + def _attrs_hash(self, combiner): + # prevent infinite recursion self.enumeration + return combiner.hash_combine((self.name, self.value)) -class Variable(T): +class Variable(T): def __init__(self, name, typ, init=None, extern=False): + super().__init__() self.name = name self.typ = typ self.init = init self.extern = extern + ################################################################ -class UndefinedIdentifier(T): +class InvalidGeneratedCode(T): + pass + + +class UndefinedIdentifier(InvalidGeneratedCode): def __init__(self, name): + super().__init__() self.name = name def __str__(self): return self.name +class InvalidGeneratedMacro(InvalidGeneratedCode): + def __init__(self, code): + super().__init__() + self.code = code + + def __str__(self): + return self.code + + def is_record(t): - return isinstance(t, Structure) or isinstance(t, Union) \ No newline at end of file + return isinstance(t, Structure) or isinstance(t, Union) diff --git a/ctypeslib/codegen/typehandler.py b/ctypeslib/codegen/typehandler.py index 2320a6a..fe116bb 100644 --- a/ctypeslib/codegen/typehandler.py +++ b/ctypeslib/codegen/typehandler.py @@ -3,6 +3,7 @@ from clang.cindex import TypeKind from ctypeslib.codegen import typedesc +from ctypeslib.codegen.cache import cached_pure_method from ctypeslib.codegen.util import log_entity from ctypeslib.codegen.handler import ClangHandler from ctypeslib.codegen.handler import InvalidDefinitionError @@ -34,6 +35,7 @@ def init_fundamental_types(self): setattr(self, TypeKind.from_id(_id).name, self._handle_fundamental_types) + @cached_pure_method() def _handle_fundamental_types(self, typ): """ Handles POD types nodes. @@ -70,6 +72,7 @@ def _handle_fundamental_types(self, typ): # not listed has node in the AST. # not very useful in python anyway. + @cached_pure_method() @log_entity def TYPEDEF(self, _cursor_type): """ @@ -84,8 +87,9 @@ def TYPEDEF(self, _cursor_type): obj = self.parse_cursor(_decl) return obj + @cached_pure_method() @log_entity - def ENUM(self, _cursor_type): + def ENUM(self, _cursor_type): """ Handles ENUM typedef. """ @@ -98,6 +102,7 @@ def ENUM(self, _cursor_type): obj = self.parse_cursor(_decl) return obj + @cached_pure_method() @log_entity def ELABORATED(self, _cursor_type): """ @@ -112,6 +117,8 @@ def ELABORATED(self, _cursor_type): obj = self.parse_cursor(_decl) return obj + # FIXME: this can't be cached? + #@cached_pure_method() @log_entity def POINTER(self, _cursor_type): """ @@ -164,6 +171,8 @@ def POINTER(self, _cursor_type): obj.comment = comment return obj + # FIXME: this can't be cached? + #@cached_pure_method() @log_entity def _array_handler(self, _cursor_type): """ @@ -205,6 +214,7 @@ def _array_handler(self, _cursor_type): VARIABLEARRAY = _array_handler DEPENDENTSIZEDARRAY = _array_handler + @cached_pure_method() @log_entity def FUNCTIONPROTO(self, _cursor_type): """Handles function prototype.""" @@ -223,6 +233,7 @@ def FUNCTIONPROTO(self, _cursor_type): self.set_location(obj, None) return obj + @cached_pure_method() @log_entity def FUNCTIONNOPROTO(self, _cursor_type): """Handles function with no prototype.""" @@ -238,6 +249,7 @@ def FUNCTIONNOPROTO(self, _cursor_type): # structures, unions, classes + @cached_pure_method() @log_entity def RECORD(self, _cursor_type): """ @@ -257,6 +269,7 @@ def RECORD(self, _cursor_type): obj = self.parse_cursor(_decl) return obj + @cached_pure_method() @log_entity def UNEXPOSED(self, _cursor_type): """ diff --git a/ctypeslib/codegen/util.py b/ctypeslib/codegen/util.py index 6f0c025..fb7f5a1 100644 --- a/ctypeslib/codegen/util.py +++ b/ctypeslib/codegen/util.py @@ -2,19 +2,22 @@ # This file provides common utility functions for the test suite. # -from clang.cindex import Cursor -from clang.cindex import TranslationUnit +from ctypeslib.codegen.cindex import Cursor +from ctypeslib.codegen.cindex import TranslationUnit +from ctypeslib.codegen.cindex import Type from collections.abc import Iterable import logging -import re +import sys +import textwrap from ctypeslib.codegen import typedesc +from ctypeslib.codegen.preprocess import eval_processed_macro -log = logging.getLogger('utils') +log = logging.getLogger("utils") -def get_tu(source, lang='c', all_warnings=False, flags=None): +def get_tu(source, lang="c", all_warnings=False, flags=None): """Obtain a translation unit from source and language. By default, the translation unit is created from source file "t." @@ -26,17 +29,17 @@ def get_tu(source, lang='c', all_warnings=False, flags=None): all_warnings is a convenience argument to enable all compiler warnings. """ args = list(flags or []) - name = 'memory_input.c' - if lang == 'cpp': - name = 'memory_input.cpp' - args.append('-std=c++11') - elif lang == 'objc': - name = 'memory_input.m' - elif lang != 'c': - raise Exception('Unknown language: %s' % lang) + name = "memory_input.c" + if lang == "cpp": + name = "memory_input.cpp" + args.append("-std=c++11") + elif lang == "objc": + name = "memory_input.m" + elif lang != "c": + raise Exception("Unknown language: %s" % lang) if all_warnings: - args += ['-Wall', '-Wextra'] + args += ["-Wall", "-Wextra"] return TranslationUnit.from_source(name, args, unsaved_files=[(name, source)]) @@ -65,7 +68,6 @@ def get_cursor(source, spelling): result = get_cursor(cursor, spelling) if result is not None: return result - return None @@ -103,6 +105,7 @@ def new_decorator(f): g.__doc__ = f.__doc__ g.__dict__.update(f.__dict__) return g + new_decorator.__name__ = dec.__name__ new_decorator.__doc__ = dec.__doc__ new_decorator.__dict__.update(dec.__dict__) @@ -112,19 +115,20 @@ def new_decorator(f): @decorator def log_entity(func): def fn(*args, **kwargs): - name = args[0].get_unique_name(args[1]) - if name == '': - parent = args[1].semantic_parent + cursor = next(arg for arg in args if isinstance(arg, (Type, Cursor))) + name = args[0].get_unique_name(cursor) + if name == "": + parent = cursor.semantic_parent if parent: - name = 'child of %s' % parent.displayname - log.debug("%s: displayname:'%s'",func.__name__, name) + name = "child of %s" % parent.displayname + log.debug("%s: displayname:'%s'", func.__name__, name) # print 'calling {}'.format(func.__name__) return func(*args, **kwargs) + return fn class ADict(dict): - def __getattr__(self, name): try: return self[name] @@ -132,40 +136,44 @@ def __getattr__(self, name): raise AttributeError(name) -_c_literal_regex = re.compile( - r"^([+-]?((\d+(e|E)[+-]?\d+)|(\d+(\.\d*)?((e|E)[+-]?\d+)?)|(\.\d+((e|E)[+-]?\d+)?)))(f|F|l|L)?$" -) - - -def from_c_float_literal(value): - if (not isinstance(value, str) and - isinstance(value, Iterable) and - all(map(lambda v: isinstance(v, str), value))): - value = "".join(value) - if not isinstance(value, str): - return None - match = _c_literal_regex.match(value) - if not match: - return None - return match.group(1) - - -def contains_undefined_identifier(macro): +def expand_macro_function(macro, args, namespace=None, limit=None, max_recursion=None): + args = ", ".join(args) + code = f"{macro.name}({args})" + if max_recursion is None: + max_recursion = sys.getrecursionlimit() + max_eval = limit or max_recursion + try: + prev = eval_processed_macro(code, namespace=namespace) + for i in range(1, max_eval + 1): + if limit is not None and limit == i: + return prev + value = eval_processed_macro(str(prev), namespace=namespace) + if prev == value: + return value + prev = value + raise RecursionError( + f"maximum recursion depth exceeded in {macro.name} expansion" + ) + except (SyntaxError, NameError): + return typedesc.InvalidGeneratedMacro(code) + + +def contains_invalid_code(macro): # body is undefined - if isinstance(macro.body, typedesc.UndefinedIdentifier): + if isinstance(macro.body, typedesc.InvalidGeneratedCode): return True - def _list_contains_undefined_identifier(l): + def _list_contains_invalid_code(l): for b in l: - if isinstance(b, typedesc.UndefinedIdentifier): + if isinstance(b, typedesc.InvalidGeneratedCode): return True - if isinstance(b, list) and _list_contains_undefined_identifier(b): + if isinstance(b, list) and _list_contains_invalid_code(b): return True return False # or one item is undefined if isinstance(macro.body, list): - if _list_contains_undefined_identifier(macro.body): + if _list_contains_invalid_code(macro.body): return True return False @@ -191,8 +199,10 @@ def body_is_all_string_tokens(macro_body): __all__ = [ - 'get_cursor', - 'get_cursors', - 'get_tu', - 'from_c_float_literal', + "get_cursor", + "get_cursors", + "get_tu", + "from_c_float_literal", + "remove_outermost_parentheses", + "replace_builtins", ] diff --git a/requirements.txt b/requirements.txt index a905ec4..31a0af7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -clang>=7.0 \ No newline at end of file +clang>=7.0 +packaging diff --git a/setup.py b/setup.py index fb2a074..04c73a2 100755 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ ]}, test_suite="test.alltests", install_requires=[ - 'clang>=11', + 'clang>=11', + 'packaging', ] ) diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..4802612 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,26 @@ +from ctypeslib.codegen.cindex import Config +from pathlib import Path +import os +import pytest + + +def pytest_addoption(parser): + parser.addoption('--libclang-library', default=None) + parser.addoption('--libclang-include-dir', default=None) + + +@pytest.fixture(scope="session", autouse=True) +def libclang_config(pytestconfig, request): + libclang_library = pytestconfig.getoption("libclang_library") + libclang_include_dir = pytestconfig.getoption("libclang_include_dir") + if libclang_library: + Config.set_library_file(libclang_library) + if libclang_include_dir: + Config.set_include_dir(libclang_include_dir) + + +@pytest.fixture(scope="class", autouse=True) +def change_test_dir(request): + os.chdir(str(Path(request.fspath).parent.parent)) + yield + os.chdir(request.config.invocation_dir) diff --git a/test/test_api.py b/test/test_api.py index a128c3a..03606eb 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -3,6 +3,8 @@ import ctypeslib +from test.util import main + class ApiTest(unittest.TestCase): def test_basic_use_string(self): @@ -56,4 +58,4 @@ def test_basic_use_io(self): if __name__ == '__main__': - unittest.main() + main() diff --git a/test/test_bitfield.py b/test/test_bitfield.py index cb0b565..8169ec6 100644 --- a/test/test_bitfield.py +++ b/test/test_bitfield.py @@ -3,7 +3,7 @@ import logging import sys -from test.util import ClangTest +from test.util import ClangTest, main class RecordTest(ClangTest): @@ -88,4 +88,4 @@ def p(s): if __name__ == "__main__": logging.basicConfig(level=logging.INFO) # logging.getLogger('codegen').setLevel(logging.INFO) - unittest.main(verbosity=2) + main(verbosity=2) diff --git a/test/test_callbacks.py b/test/test_callbacks.py index feb67d8..aa1c0d0 100644 --- a/test/test_callbacks.py +++ b/test/test_callbacks.py @@ -2,7 +2,7 @@ import ctypes import logging -from test.util import ClangTest +from test.util import ClangTest, main class Callback(ClangTest): @@ -47,4 +47,4 @@ def test_callbacks(self): if __name__ == "__main__": # logging.basicConfig(level=logging.INFO) - unittest.main(verbosity=2) + main(verbosity=2) diff --git a/test/test_clang2py.py b/test/test_clang2py.py index 6cf2ea6..5a0e9df 100644 --- a/test/test_clang2py.py +++ b/test/test_clang2py.py @@ -1,28 +1,59 @@ +import os import subprocess import sys import unittest -from test.util import ClangTest +from pathlib import Path +from test.util import ClangTest, main import ctypeslib +from io import StringIO +from unittest import mock + + +def run(args, env): + p = subprocess.run( + args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) + output, stderr = p.stdout.decode(), p.stderr.decode() + return p, output, stderr + + +clang2py_path = None +python_path = None +libclang_library = None +libclang_include_dir = None +use_pytest = False + + +try: + import pytest + @pytest.fixture(scope="module", autouse=True) + def _clang2py_path(request): + global python_path + python_path = Path(request.fspath).parent.parent -def run(args): - if hasattr(subprocess, 'run'): - p = subprocess.run(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - output, stderr = p.stdout.decode(), p.stderr.decode() - return p, output, stderr - else: - p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=-1) - output, stderr = p.communicate() - return p, output, stderr + @pytest.fixture(scope="session", autouse=True) + def libclang_config(pytestconfig, request): + global libclang_library + global libclang_include_dir + libclang_library = pytestconfig.getoption("libclang_library") + libclang_include_dir = pytestconfig.getoption("libclang_include_dir") + use_pytest = True -__, clang2py_path, __ = run(['which', 'clang2py']) -clang2py_path = clang2py_path.strip() +except ImportError: + python_path = Path(__file__).parent.parent + +clang2py_path = Path(__file__).parent.parent / "ctypeslib" / "clang2py.py" def clang2py(args): - return run([sys.executable, clang2py_path] + args) + global libclang_include_dir + env = os.environ.copy() + env["PYTHONPATH"] = f"{python_path}:{env['PYTHONPATH']}" + if libclang_include_dir: + args += [f'--clang-args=-isystem{libclang_include_dir}'] + return run([sys.executable, clang2py_path] + args, env=env) class InputOutput(ClangTest): @@ -57,7 +88,7 @@ def test_no_files(self): def test_multiple_source_files(self): """run clang2py -i test/data/test-basic-types.c test/data/test-bitfield.c""" - p, output, stderr = run(['clang2py', '-i', 'test/data/test-basic-types.c', 'test/data/test-bitfield.c']) + p, output, stderr = clang2py(['-i', 'test/data/test-basic-types.c', 'test/data/test-bitfield.c']) self.assertEqual(0, p.returncode) self.assertIn("WORD_SIZE is:", output) self.assertIn("_long = ", output) @@ -101,6 +132,14 @@ def test_helper(self): class ArgumentTypeKind(ClangTest): + def setUp(self): + # We need to generate macro (including function-like macro) + # This used to take a long time to process but some performance + # improvements have been implemented and I am not sure if it's + # still the case for common workloads. (See: codegen.cache). + self.full_parsing_options = True + self.advanced_macro = True + @unittest.skip('find a good test for aliases') def test_alias(self): """run clang2py -k a test/data/test-stdint.cpp""" @@ -211,66 +250,71 @@ def test_debug(self): self.assertIn("# Total symbols:", stderr) -from io import StringIO -from unittest.mock import patch - +if use_pytest: -class ModuleTesting(ClangTest): - def test_version(self): - """run clang2py -v""" - from ctypeslib import clang2py - with patch('sys.stdout', new=StringIO()) as fake_out: + class ModuleTesting(ClangTest): + def test_version(self): + """run clang2py -v""" + from ctypeslib import clang2py with self.assertRaises(SystemExit): clang2py.main(['--version']) - self.assertIn(str(ctypeslib.__version__), fake_out.getvalue()) + captured = self.capfd.readouterr() + self.assertIn(str(ctypeslib.__version__), captured.out) - def test_arg_file(self): - """run clang2py test/data/test-basic-types.c""" - from ctypeslib import clang2py - with patch('sys.stdout', new=StringIO()) as fake_out: + def test_arg_file(self): + """run clang2py test/data/test-basic-types.c""" + from ctypeslib import clang2py clang2py.main(['test/data/test-basic-types.c']) - self.assertIn("_int = ctypes.c_int", fake_out.getvalue()) - - def test_arg_input_stdin(self): - """run echo | clang2py - """ - from ctypeslib import clang2py - with patch('sys.stdin', StringIO('int i = 0;')) as stdin, patch('sys.stdout', new=StringIO()) as fake_out: - clang2py.main(['-']) - self.assertIn("__all__ =", fake_out.getvalue()) - self.assertIn("# TARGET arch is: []", fake_out.getvalue()) - - def test_arg_debug(self): - """run clang2py --debug test/data/test-basic-types.c""" - from ctypeslib import clang2py - with patch('sys.stdout', new=StringIO()) as fake_out, patch('sys.stderr', new=StringIO()) as fake_err: + captured = self.capfd.readouterr() + self.assertIn("_int = ctypes.c_int", captured.out) + + def test_arg_input_stdin(self): + """run echo | clang2py - """ + from ctypeslib import clang2py + with mock.patch('sys.stdin', StringIO('int i = 0;')) as stdin: + clang2py.main(['-']) + captured = self.capfd.readouterr() + self.assertIn("__all__ =", captured.out) + self.assertIn("# TARGET arch is:", captured.out) + + @unittest.skip('stderr capturing fails for some unknown reason...') + def test_arg_debug(self): + """run clang2py --debug test/data/test-basic-types.c""" + from ctypeslib import clang2py clang2py.main(['--debug', 'test/data/test-basic-types.c']) - self.assertIn("_int = ctypes.c_int", fake_out.getvalue()) - self.assertIn("DEBUG:clangparser:ARCH sizes:", fake_err.getvalue()) - self.assertNotIn("ERROR", fake_err.getvalue()) - - def test_arg_target(self): - """run clang2py --target x86_64-Linux test/data/test-basic-types.c """ - from ctypeslib import clang2py - with patch('sys.stdout', new=StringIO()) as fake_out: + captured = self.capfd.readouterr() + self.assertIn("_int = ctypes.c_int", captured.out) + self.assertIn("DEBUG:clangparser:ARCH sizes:", captured.err) + self.assertNotIn("ERROR", captured.err) + + def test_arg_target(self): + """run clang2py --target x86_64-Linux test/data/test-basic-types.c """ + from ctypeslib import clang2py clang2py.main(['--target', 'x86_64-Linux', 'test/data/test-basic-types.c']) - self.assertIn("# TARGET arch is: ['-target', 'x86_64-Linux']", fake_out.getvalue()) - self.assertIn("_int = ctypes.c_int", fake_out.getvalue()) - self.assertIn("_long = ctypes.c_int64", fake_out.getvalue()) + captured = self.capfd.readouterr() + self.assertIn("# TARGET arch is: x86_64-Linux", captured.out) + self.assertIn("_int = ctypes.c_int", captured.out) + self.assertIn("_long = ctypes.c_int64", captured.out) clang2py.main(['--target', 'i586-Linux', 'test/data/test-basic-types.c']) - self.assertIn("# TARGET arch is: ['-target', 'i586-Linux']", fake_out.getvalue()) - self.assertIn("_int = ctypes.c_int", fake_out.getvalue()) - self.assertIn("_long = ctypes.c_int32", fake_out.getvalue()) - - # TODO - @unittest.skip - def test_arg_clang_args(self): - """run clang2py test/data/test-basic-types.c --clang-args="-DDEBUG=2" """ - from ctypeslib import clang2py - with patch('sys.stdin', StringIO('int i = DEBUG;')) as stdin, patch('sys.stdout', new=StringIO()) as fake_out: + captured = self.capfd.readouterr() + self.assertIn("# TARGET arch is: i586-Linux", captured.out) + self.assertIn("_int = ctypes.c_int", captured.out) + self.assertIn("_long = ctypes.c_int32", captured.out) + + # TODO + @unittest.skip + def test_arg_clang_args(self): + """run clang2py test/data/test-basic-types.c --clang-args="-DDEBUG=2" """ + from ctypeslib import clang2py clang2py.main(['', '--clang-args="-DDEBUG=2"', '-']) - self.assertIn("# TARGET arch is: []", fake_out.getvalue()) - self.assertIn("i = 2", fake_out.getvalue()) + captured = self.capfd.readouterr() + self.assertIn("# TARGET arch is:", captured.out) + self.assertIn("i = 2", captured.out) + + @pytest.fixture(autouse=True) + def capfd(self, capfd): + self.capfd = capfd class OrderingTest(ClangTest): @@ -298,4 +342,4 @@ def test_enum_struct(self): if __name__ == "__main__": - unittest.main() + main() diff --git a/test/test_clangparser.py b/test/test_clangparser.py index b709fb9..18ce15a 100644 --- a/test/test_clangparser.py +++ b/test/test_clangparser.py @@ -1,6 +1,6 @@ -import io +import logging -from test.util import ClangTest +from test.util import ClangTest, main from ctypeslib.codegen import clangparser @@ -34,4 +34,9 @@ def test_parse_string(self): self.assertTrue(self.parser.is_registered('struct_example_detail')) self.assertTrue(self.parser.is_registered('struct_example')) self.assertFalse(self.parser.is_registered('struct_whatever')) - return \ No newline at end of file + return + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + main(verbosity=2) diff --git a/test/test_cross_arch.py b/test/test_cross_arch.py index 112e89d..6b36f62 100644 --- a/test/test_cross_arch.py +++ b/test/test_cross_arch.py @@ -3,7 +3,7 @@ import sys import unittest -from test.util import ClangTest +from test.util import ClangTest, main class CrossArchSimplerCode(ClangTest): @@ -26,7 +26,5 @@ def test_same_arch_pointer(self): # print(self.text_output) - - if __name__ == "__main__": - unittest.main() + main() diff --git a/test/test_enum.py b/test/test_enum.py index 522d028..9dca2e9 100644 --- a/test/test_enum.py +++ b/test/test_enum.py @@ -1,7 +1,7 @@ import unittest import ctypes -from test.util import ClangTest +from test.util import ClangTest, main class EnumTest(ClangTest): @@ -307,4 +307,4 @@ def test_enum_struct_ordering(self): if __name__ == "__main__": # logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) # logging.getLogger('codegen').setLevel(logging.INFO) - unittest.main() + main() diff --git a/test/test_example_script.py b/test/test_example_script.py index 7d6c689..43aaabf 100644 --- a/test/test_example_script.py +++ b/test/test_example_script.py @@ -11,6 +11,7 @@ from io import StringIO from ctypeslib.codegen import clangparser, codegenerator +from test.util import main class ExampleTest(unittest.TestCase): def setUp(self): @@ -90,5 +91,6 @@ def test_example(self): #print("\tone.details[0].first == %d" % one.details[0].first) return + if __name__ == "__main__": - unittest.main() \ No newline at end of file + main() diff --git a/test/test_fast_clang.py b/test/test_fast_clang.py index 90aaac7..a40fbae 100644 --- a/test/test_fast_clang.py +++ b/test/test_fast_clang.py @@ -1,7 +1,7 @@ import unittest import ctypes -from test.util import ClangTest +from test.util import ClangTest, main class CompareSizes(ClangTest): @@ -75,4 +75,4 @@ def test_record_complex(self): import sys if __name__ == "__main__": #logging.basicConfig( stream=sys.stderr, level=logging.DEBUG ) - unittest.main() + main() diff --git a/test/test_function.py b/test/test_function.py index d75023f..1c221b9 100644 --- a/test/test_function.py +++ b/test/test_function.py @@ -3,7 +3,7 @@ from ctypeslib.codegen.util import get_cursor from ctypeslib.codegen.util import get_tu -from test.util import ClangTest +from test.util import ClangTest, main """Test if functions are correctly generated. """ @@ -77,4 +77,4 @@ def test_two_stub_functions(self): if __name__ == "__main__": import logging logging.basicConfig(level=logging.DEBUG) - unittest.main() + main() diff --git a/test/test_macro.py b/test/test_macro.py index 838bb89..d98d6b4 100644 --- a/test/test_macro.py +++ b/test/test_macro.py @@ -1,7 +1,7 @@ import unittest import datetime -from test.util import ClangTest +from test.util import ClangTest, main '''Test if macro are correctly generated. ''' @@ -480,6 +480,7 @@ def test_function(self): #define fn_name(a,b) real_name(a,b) fn_type fn_name(int a, int b); ''') + # print(self.text_output) self.assertIn("real_name", self.namespace) def test_simple_macro_function(self): @@ -508,7 +509,7 @@ def test_example(self): self.assertEqual(self.namespace.tab1, [1, 2]) self.assertEqual(self.namespace.DEBUG, True) self.assertEqual(self.namespace.PROD, 1) - # we don't gen macro functions + # we DO NOT gen macro functions by default self.assertNotIn('MACRO_EXAMPLE', self.namespace) # self.assertEqual(self.namespace.MY, 123456) # that is not a thing that compiles @@ -524,7 +525,7 @@ def test_macro_to_variable(self): #define NO_SPAM NO SPACE SPAM #define NO_SPAM_FOO NO SPACE SPAM SPACE FOO ''') - # print(self.text_output) + # print(self.text_output) self.assertIn('SPAM', self.namespace) self.assertEqual('spam', self.namespace.SPAM) self.assertIn('NO', self.namespace) @@ -603,7 +604,7 @@ def test_internal_defines_recursive(self): # replace leading 0 in day by a whitespace. this_date = datetime.datetime.now().strftime("%b %d %Y").replace(" 0", " ") self.assertIn("# DATE = __DATE__", self.text_output) - self.assertIn("# DATE2 = __DATE__", self.text_output) + self.assertIn("# DATE2 = DATE", self.text_output) @unittest.skip def test_internal_defines_recursive_with_operation(self): @@ -645,8 +646,8 @@ def test_pack_attribute(self): }; ''') # print(self.text_output) - self.assertIn("# PACK = __attribute__", self.text_output) - self.assertIn("# PACKTO = __attribute__", self.text_output) + self.assertIn("# PACK = ", self.text_output) + self.assertIn("# PACKTO = ", self.text_output) self.assertIn("struct_foo", self.namespace) def test_enum_macro(self): @@ -668,5 +669,5 @@ def test_enum_macro(self): if __name__ == "__main__": import logging - logging.basicConfig(level=logging.DEBUG) - unittest.main() + # logging.basicConfig(level=logging.DEBUG) + main() diff --git a/test/test_macro_advanced.py b/test/test_macro_advanced.py new file mode 100644 index 0000000..0d4e432 --- /dev/null +++ b/test/test_macro_advanced.py @@ -0,0 +1,64 @@ +import unittest +import datetime +import textwrap + +from test.util import ClangTest, main + +"""Test if macro are correctly generated. +""" + +import logging # noqa + +# logging.basicConfig(level=logging.DEBUG) + + +class Macro(ClangTest): + # @unittest.skip('') + + def setUp(self): + # We need to generate macro (including function-like macro) + # This used to take a long time to process but some performance + # improvements have been implemented and I am not sure if it's + # still the case for common workloads. (See: codegen.cache). + self.full_parsing_options = True + self.advanced_macro = True + + def test_bitwise(self): + self.convert(textwrap.dedent(""" + #define FOO(foo) (foo & 0x0FFFF) + """)) + # print(self.text_output) + self.assertEqual(self.namespace.FOO(0x1ABCD), 0x0ABCD) + + def test_va_args(self): + self.convert(textwrap.dedent(""" + #define FOO(...) ("foo", __VA_ARGS__, "bar") + #define BAR(a, b, c) FOO(c, b, a) + """)) + # print(self.text_output) + self.assertEqual(self.namespace.BAR(1, 2, 3), ("foo", 3, 2, 1, "bar")) + + def test_stdint(self): + self.convert(textwrap.dedent(""" + #include + """)) + print(self.text_output) + self.assertEqual(self.namespace.INT8_MIN, -128) + self.assertEqual(self.namespace.INT16_MIN, -32767 - 1) + self.assertEqual(self.namespace.INT32_MIN, -2147483647 - 1) + self.assertEqual(self.namespace.INT64_MIN, -9223372036854775807 - 1) + + self.assertEqual(self.namespace.INT8_MAX, 127) + self.assertEqual(self.namespace.INT16_MAX, 32767) + self.assertEqual(self.namespace.INT32_MAX, 2147483647) + self.assertEqual(self.namespace.INT64_MAX, 9223372036854775807) + + self.assertEqual(self.namespace.UINT8_MAX, 255) + self.assertEqual(self.namespace.UINT16_MAX, 65535) + self.assertEqual(self.namespace.UINT32_MAX, 4294967295) + self.assertEqual(self.namespace.UINT64_MAX, 18446744073709551615) + + +if __name__ == "__main__": + # logging.basicConfig(level=logging.INFO) + main(verbosity=2) diff --git a/test/test_pointer.py b/test/test_pointer.py index c5b4302..87e4a57 100644 --- a/test/test_pointer.py +++ b/test/test_pointer.py @@ -1,7 +1,7 @@ import unittest import ctypes -from test.util import ClangTest +from test.util import ClangTest, main '''Test if pointers are correctly generated in structures for different target archictecture. @@ -46,4 +46,4 @@ def test_same_arch_pointer(self): if __name__ == "__main__": - unittest.main() + main() diff --git a/test/test_record.py b/test/test_record.py index 608ee28..d1dd177 100644 --- a/test/test_record.py +++ b/test/test_record.py @@ -1,7 +1,7 @@ import ctypes import unittest -from test.util import ClangTest +from test.util import ClangTest, main import logging @@ -245,4 +245,4 @@ def test_record_ordering(self): if __name__ == "__main__": # logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) # logging.getLogger('codegen').setLevel(logging.INFO) - unittest.main() + main() diff --git a/test/test_strings.py b/test/test_strings.py index bffedce..d582ba5 100644 --- a/test/test_strings.py +++ b/test/test_strings.py @@ -1,7 +1,7 @@ import logging import unittest -from test.util import ClangTest +from test.util import ClangTest, main # logging.basicConfig(level=logging.DEBUG) @@ -141,4 +141,4 @@ def test_unicode_cpp11(self): if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) - unittest.main() + main() diff --git a/test/test_types_sizes.py b/test/test_types_sizes.py index b64fe26..6b11131 100644 --- a/test/test_types_sizes.py +++ b/test/test_types_sizes.py @@ -1,6 +1,6 @@ import unittest -from test.util import ClangTest +from test.util import ClangTest, main class BasicTypes(ClangTest): @@ -110,4 +110,4 @@ def test_typedef(self): if __name__ == "__main__": - unittest.main() + main() diff --git a/test/test_types_values.py b/test/test_types_values.py index 9beede2..f5b98b8 100644 --- a/test/test_types_values.py +++ b/test/test_types_values.py @@ -4,7 +4,7 @@ import logging import unittest -from test.util import ClangTest +from test.util import ClangTest, main class ConstantsTest(ClangTest): @@ -372,4 +372,4 @@ def test_docstring(self): if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) - unittest.main() + main() diff --git a/test/util.py b/test/util.py index 42fd18e..745a0c1 100644 --- a/test/util.py +++ b/test/util.py @@ -2,13 +2,16 @@ # This file provides common utility functions for the test suite. # +import argparse import ctypes import os +import sys from io import StringIO from ctypes import RTLD_GLOBAL -from clang.cindex import Cursor -from clang.cindex import TranslationUnit +from ctypeslib.codegen.cindex import Config +from ctypeslib.codegen.cindex import Cursor +from ctypeslib.codegen.cindex import TranslationUnit import unittest from ctypeslib.codegen import clangparser, codegenerator from ctypeslib.codegen import util as codegen_util @@ -27,6 +30,7 @@ class ClangTest(unittest.TestCase): namespace = None text_output = None full_parsing_options = False + advanced_macro = False def _gen(self, ofi, fname, flags=None, dlls=None): """Take a file input and generate the code. @@ -36,7 +40,7 @@ def _gen(self, ofi, fname, flags=None, dlls=None): # leave the new parser accessible for tests self.parser = clangparser.Clang_Parser(flags) if self.full_parsing_options: - self.parser.activate_macros_parsing() + self.parser.activate_macros_parsing(self.advanced_macro) self.parser.activate_comment_parsing() with open(fname): pass @@ -57,7 +61,9 @@ def gen(self, fname, flags=None, dlls=[], debug=False): ofi = StringIO() gen = self._gen(ofi, fname, flags=flags, dlls=dlls) # load code - namespace = {} + namespace = { + "__file__": os.path.join(os.path.dirname(__file__), "data", "test_util.py"), + } # DEBUG # print ofi.getvalue() # DEBUG @@ -158,5 +164,18 @@ def assertOffsets(self, name): return +def main(*args, **kwds): + parser = argparse.ArgumentParser() + parser.add_argument('--libclang-library', default=None) + parser.add_argument('--libclang-include-dir', default=None) + ns, unittest_args = parser.parse_known_args() + if ns.libclang_library: + Config.set_library_file(ns.libclang_library) + if ns.libclang_include_dir: + Config.set_include_dir(ns.libclang_include_dir) + sys.argv[1:] = unittest_args + return unittest.main(*args, **kwds) + + __all__ = [ ]