From 4d307a886ed1efd9e26d768a4bb3491d4fde9443 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sun, 4 Dec 2022 10:16:52 -0500 Subject: [PATCH 1/3] Add option to remove preprocessor macro detection --- CppHeaderParser/CppHeaderParser.py | 42 ++++++++++++++++++------------ 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index b8f744b..5897f6b 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -2702,7 +2702,14 @@ def show(self): for className in list(self.classes.keys()): self.classes[className].show() - def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): + def __init__( + self, + headerFileName, + argType="file", + encoding=None, + preprocessed=False, + **kwargs + ): """Create the parsed C++ header file parse tree headerFileName - Name of the file to parse OR actual file contents (depends on argType) @@ -2802,21 +2809,24 @@ def __init__(self, headerFileName, argType="file", encoding=None, **kwargs): "[ ]+", " ", supportedAccessSpecifier[i] ).strip() - # Change multi line #defines and expressions to single lines maintaining line nubmers - # Based from http://stackoverflow.com/questions/2424458/regular-expression-to-match-cs-multiline-preprocessor-statements - matches = re.findall(r"(?m)^(?:.*\\\r?\n)+.*$", headerFileStr) - is_define = re.compile(r"[ \t\v]*#[Dd][Ee][Ff][Ii][Nn][Ee]") - for m in matches: - # Keep the newlines so that linecount doesnt break - num_newlines = len([a for a in m if a == "\n"]) - if is_define.match(m): - new_m = m.replace("\n", "\\n") - else: - # Just expression taking up multiple lines, make it take 1 line for easier parsing - new_m = m.replace("\\\n", " ") - if num_newlines > 0: - new_m += "\n" * (num_newlines) - headerFileStr = headerFileStr.replace(m, new_m) + if not preprocessed: + # Change multi line #defines and expressions to single lines maintaining line nubmers + # Based from http://stackoverflow.com/questions/2424458/regular-expression-to-match-cs-multiline-preprocessor-statements + matches = re.findall(r"(?m)^(?:.*\\\r?\n)+.*$", headerFileStr) + is_define = re.compile(r"[ \t\v]*#[Dd][Ee][Ff][Ii][Nn][Ee]") + for m in matches: + # Keep the newlines so that linecount doesnt break + num_newlines = len([a for a in m if a == "\n"]) + if is_define.match(m): + new_m = m.replace( + "\n", "\\n" + ) + else: + # Just expression taking up multiple lines, make it take 1 line for easier parsing + new_m = m.replace("\\\n", " ") + if num_newlines > 0: + new_m += "\n" * (num_newlines) + headerFileStr = headerFileStr.replace(m, new_m) # Filter out Extern "C" statements. These are order dependent matches = re.findall( From 8d4abd7a949b7289170fd685c704b847f475ac82 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sun, 4 Dec 2022 21:56:07 -0500 Subject: [PATCH 2/3] Fix extern 'C' parsing - Linkage wasn't being recorded previously - Namespaces were being destroyed --- CppHeaderParser/CppHeaderParser.py | 48 +++++++++++++++++++----------- test/test_CppHeaderParser.py | 46 ++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 18 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index 5897f6b..05588e9 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -139,6 +139,9 @@ def trace_print(*args): #: Symbols to ignore, usually special macros ignoreSymbols = ["Q_OBJECT"] +_BRACE_REASON_OTHER = 0 +_BRACE_REASON_NS = 1 +_BRACE_REASON_EXTERN = 2 # Track what was added in what order and at what depth parseHistory = [] @@ -1506,6 +1509,11 @@ def cur_namespace(self, add_double_colon=False): i += 1 return rtn + def cur_linkage(self): + if len(self.linkage_stack): + return self.linkage_stack[-1] + return "" + def guess_ctypes_type(self, string): pointers = string.count("*") string = string.replace("*", "") @@ -2372,6 +2380,7 @@ def _evaluate_method_stack(self): self._get_location(self.nameStack), ) newMethod["parent"] = None + newMethod["linkage"] = self.cur_linkage() self.functions.append(newMethod) # Reset template once it has been used @@ -2524,6 +2533,7 @@ def _evaluate_property_stack(self, clearStack=True, addToVar=None): self._get_location(self.nameStack), ) newVar["namespace"] = self.current_namespace() + newVar["linkage"] = self.cur_linkage() if self.curClass: klass = self.classes[self.curClass] klass["properties"][self.curAccessSpecifier].append(newVar) @@ -2542,6 +2552,7 @@ def _evaluate_property_stack(self, clearStack=True, addToVar=None): self._get_location(self.nameStack), ) newVar["namespace"] = self.cur_namespace(False) + newVar["linkage"] = self.cur_linkage() if addToVar: newVar.update(addToVar) self.variables.append(newVar) @@ -2600,6 +2611,7 @@ def _evaluate_class_stack(self): ) self.curTemplate = None newClass["declaration_method"] = self.nameStack[0] + newClass["linkage"] = self.cur_linkage() self.classes_order.append(newClass) # good idea to save ordering self.stack = [] # fixes if class declared with ';' in closing brace self.stmtTokens = [] @@ -2782,6 +2794,7 @@ def __init__( self.curAccessSpecifier = "private" # private is default self.curTemplate = None self.accessSpecifierStack = [] + self.linkage_stack = [] debug_print( "curAccessSpecifier changed/defaulted to %s", self.curAccessSpecifier ) @@ -2828,16 +2841,6 @@ def __init__( new_m += "\n" * (num_newlines) headerFileStr = headerFileStr.replace(m, new_m) - # Filter out Extern "C" statements. These are order dependent - matches = re.findall( - re.compile(r'extern[\t ]+"[Cc]"[\t \n\r]*{', re.DOTALL), headerFileStr - ) - for m in matches: - # Keep the newlines so that linecount doesnt break - num_newlines = len([a for a in m if a == "\n"]) - headerFileStr = headerFileStr.replace(m, "\n" * num_newlines) - headerFileStr = re.sub(r'extern[ ]+"[Cc]"[ ]*', "", headerFileStr) - # Filter out any ignore symbols that end with "()" to account for #define magic functions for ignore in ignoreSymbols: if not ignore.endswith("()"): @@ -2876,6 +2879,8 @@ def __init__( ) self.braceDepth = 0 + self.braceReason = [] + self.lastBraceReason = _BRACE_REASON_OTHER lex = Lexer(self.headerFileName) lex.input(headerFileStr) @@ -2948,23 +2953,20 @@ def __init__( continue if parenDepth == 0 and tok.type == "{": + self.lastBraceReason = _BRACE_REASON_OTHER if len(self.nameStack) >= 2 and is_namespace( self.nameStack ): # namespace {} with no name used in boost, this sets default? - if ( - self.nameStack[1] - == "__IGNORED_NAMESPACE__CppHeaderParser__" - ): # Used in filtering extern "C" - self.nameStack[1] = "" self.nameSpaces.append("".join(self.nameStack[1:])) ns = self.cur_namespace() self.stack = [] self.stmtTokens = [] if ns not in self.namespaces: self.namespaces.append(ns) + self.lastBraceReason = _BRACE_REASON_NS # Detect special condition of macro magic before class declaration so we # can filter it out - if "class" in self.nameStack and self.nameStack[0] != "class": + elif "class" in self.nameStack and self.nameStack[0] != "class": classLocationNS = self.nameStack.index("class") classLocationS = self.stack.index("class") if ( @@ -2997,14 +2999,20 @@ def __init__( self.stmtTokens = [] if not self.braceHandled: self.braceDepth += 1 + self.braceReason.append(self.lastBraceReason) elif parenDepth == 0 and tok.type == "}": if self.braceDepth == 0: continue - if self.braceDepth == len(self.nameSpaces): - tmp = self.nameSpaces.pop() + reason = self.braceReason.pop() + if reason == _BRACE_REASON_NS: + self.nameSpaces.pop() self.stack = [] # clear stack when namespace ends? self.stmtTokens = [] + elif reason == _BRACE_REASON_EXTERN: + self.linkage_stack.pop() + self.stack = [] # clear stack when linkage ends? + self.stmtTokens = [] else: self._evaluate_stack() self.braceDepth -= 1 @@ -3368,8 +3376,10 @@ def _evaluate_stack(self, token=None): pass elif len(self.nameStack) == 2 and self.nameStack[0] == "extern": debug_print("trace extern") + self.linkage_stack.append(self.nameStack[1].strip('"')) self.stack = [] self.stmtTokens = [] + self.lastBraceReason = _BRACE_REASON_EXTERN elif ( len(self.nameStack) == 2 and self.nameStack[0] == "friend" ): # friend class declaration @@ -3677,12 +3687,14 @@ def _parse_enum(self): def _install_enum(self, newEnum, instancesData): if len(self.curClass): newEnum["namespace"] = self.cur_namespace(False) + newEnum["linkage"] = self.cur_linkage() klass = self.classes[self.curClass] klass["enums"][self.curAccessSpecifier].append(newEnum) if self.curAccessSpecifier == "public" and "name" in newEnum: klass._public_enums[newEnum["name"]] = newEnum else: newEnum["namespace"] = self.cur_namespace(True) + newEnum["linkage"] = self.cur_linkage() self.enums.append(newEnum) if "name" in newEnum and newEnum["name"]: self.global_enums[newEnum["name"]] = newEnum diff --git a/test/test_CppHeaderParser.py b/test/test_CppHeaderParser.py index e3472f5..c8aef1b 100644 --- a/test/test_CppHeaderParser.py +++ b/test/test_CppHeaderParser.py @@ -4183,5 +4183,51 @@ def test_fn3(self): self.assertEqual(fn["pure_virtual"], True) +class ExternCQuirk(unittest.TestCase): + # bug where extern "C" reset the namespace + def setUp(self): + self.cppHeader = CppHeaderParser.CppHeader( + """ +namespace cs { +extern "C" { + struct InCSAndExtern {}; + void FnInCSAndExtern(InCSAndExtern *n); +} + +class InCS {}; + +} + +void FnNotInCSOrExtern(); + +""", + "string", + ) + + def test_fn(self): + + # NotCS should be in namespace cs, extern C + c = self.cppHeader.classes["InCSAndExtern"] + self.assertEqual(c["namespace"], "cs") + self.assertEqual(c["linkage"], "C") + + # FnNotCS should be in namespace cs, extern C + fn = self.cppHeader.functions[0] + self.assertEqual(fn["name"], "FnInCSAndExtern") + self.assertEqual(fn["namespace"], "cs::") + self.assertEqual(fn["linkage"], "C") + + # InCS should be in namespace cs + c = self.cppHeader.classes["InCS"] + self.assertEqual(c["namespace"], "cs") + self.assertEqual(c["linkage"], "") + + # FnNotCS should not in namespace cs nor extern C + fn = self.cppHeader.functions[1] + self.assertEqual(fn["name"], "FnNotInCSOrExtern") + self.assertEqual(fn["namespace"], "") + self.assertEqual(fn["linkage"], "") + + if __name__ == "__main__": unittest.main() From da1a074314e753a970bc00e7eccc4a62a8c616d7 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sun, 4 Dec 2022 22:18:21 -0500 Subject: [PATCH 3/3] Remove SubTypedefs thing - Doesn't seem useful --- CppHeaderParser/CppHeaderParser.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/CppHeaderParser/CppHeaderParser.py b/CppHeaderParser/CppHeaderParser.py index 05588e9..4482287 100644 --- a/CppHeaderParser/CppHeaderParser.py +++ b/CppHeaderParser/CppHeaderParser.py @@ -1480,7 +1480,6 @@ class Resolver(object): C_KEYWORDS = "extern virtual static explicit inline friend constexpr".split() C_KEYWORDS = set(C_KEYWORDS) - SubTypedefs = {} # TODO deprecate? NAMESPACES = [] CLASSES = {} @@ -1878,21 +1877,6 @@ def finalize_vars(self): var["ctypes_type"] = "ctypes.c_void_p" var["unresolved"] = True - elif tag in self.SubTypedefs: # TODO remove SubTypedefs - if ( - "property_of_class" in var - or "property_of_struct" in var - ): - trace_print( - "class:", self.SubTypedefs[tag], "tag:", tag - ) - var["typedef"] = self.SubTypedefs[tag] # class name - var["ctypes_type"] = "ctypes.c_void_p" - else: - trace_print("WARN-this should almost never happen!") - trace_print(var) - var["unresolved"] = True - elif tag in self._template_typenames: var["typename"] = tag var["ctypes_type"] = "ctypes.c_void_p" @@ -2069,10 +2053,6 @@ def finalize(self): trace_print("meth returns class:", meth["returns"]) meth["returns_class"] = True - elif meth["returns"] in self.SubTypedefs: - meth["returns_class"] = True - meth["returns_nested"] = self.SubTypedefs[meth["returns"]] - elif meth["returns"] in cls._public_enums: enum = cls._public_enums[meth["returns"]] meth["returns_enum"] = enum.get("type") @@ -2481,7 +2461,6 @@ def _evaluate_property_stack(self, clearStack=True, addToVar=None): klass["typedefs"][self.curAccessSpecifier].append(name) if self.curAccessSpecifier == "public": klass._public_typedefs[name] = typedef["type"] - Resolver.SubTypedefs[name] = self.curClass else: assert 0 elif self.curClass: