From 261f0e4ca6b17f284247cb361de56f12bcd6f068 Mon Sep 17 00:00:00 2001
From: heck <heck@pep.foundation>
Date: Sun, 20 Dec 2020 00:29:12 +0100
Subject: [PATCH] rename losing diff sucks, but...

---
 gen/extract.py | 143 ----------------
 gen/gen_cdl.py | 430 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 430 insertions(+), 143 deletions(-)
 delete mode 100755 gen/extract.py
 create mode 100755 gen/gen_cdl.py

diff --git a/gen/extract.py b/gen/extract.py
deleted file mode 100755
index 202b235..0000000
--- a/gen/extract.py
+++ /dev/null
@@ -1,143 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import os
-import json
-
-from ast_parser import ASTParser
-from simple_ast import SimpleAST
-
-
-def join_dir_and_filenames(dirname, filenames):
-    paths = []
-    for basename in filenames:
-        path = dirname + basename
-        paths.append(path)
-    return paths
-
-
-def read_file(path):
-    with open(path) as f:
-        file_content = f.read()
-    return file_content
-
-
-# out-dir is in-dir if not specified
-def create_header(path, out_dir=None):
-    header = {"path": "",
-              "dir": "",
-              "filename": "",
-              "out_dir": "",
-              "sourcecode": ""}
-
-    header["path"] = path
-    header["dir"] = os.path.dirname(path)
-    header["filename"] = os.path.basename(path)
-
-    header["out_dir"] = header["dir"]
-    if out_dir:
-        header["out_dir"] = out_dir
-
-    header["sourcecode"] = read_file(path)
-    return header
-
-
-def write_json(content, outpath):
-    # create path if not existing
-    out_dir = os.path.dirname(outpath)
-    if not os.path.isdir(out_dir):
-        os.makedirs(out_dir)
-    # write
-    with open(outpath, "w+") as f:
-        json.dump(content, f, indent=4)
-
-
-# generates simple-ast for each header specified in spec out dir.
-def main_old():
-    parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
-
-    in_dir = r"/Users/heck/local-default/include/pEp/"
-    filenames = ["pEpEngine.h",
-                 "keymanagement.h",
-                 "message_api.h",
-                 "message.h",
-                 "sync_api.h",
-                 "key_reset.h",
-                 "Identity.h",
-                 "Rating.h"]
-
-    out_dir = "data/output"
-
-    paths = join_dir_and_filenames(in_dir, filenames)
-
-    headers = []
-    for path in paths:
-        headers.append(create_header(path, out_dir))
-
-    for header in headers:
-        print("processing path: " + header["path"] + "...")
-        header["ast"] = parser.parse(header["path"], header["sourcecode"])
-        write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json")
-
-        simpleAst = SimpleAST()
-        header["simple_ast"] = simpleAst.create_simple_ast(header["ast"])
-        write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json")
-
-
-def recursive_query(data, filter, transform=lambda x: x):
-    resultset = []
-
-    # decorator just handling exceptions
-    def filter_decorator(data):
-        try:
-            return filter(data)
-        except KeyError:
-            pass
-
-    # filter current data
-    if filter_decorator(data):
-        # transform result
-        xformed = transform(data)
-        if xformed:
-            resultset.append(xformed)
-
-    # recurse
-    if "children" in data:
-        for item in data["children"]:
-            childres = recursive_query(item, filter, transform)
-            if childres:
-                resultset += childres
-
-    return resultset
-
-
-def main_new():
-    parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
-
-    # header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./")
-    header = create_header("data/input/test_data/main_include.h")
-
-    header["ast"] = parser.parse(header["path"], follow_includes=True)
-    write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json")
-
-    # query
-    def filter_xzy(item):
-        if (item["is_definition"] == False
-            and item["kind"] == "CursorKind.STRUCT_DECL"
-        ):
-            return True
-
-    def xform(item):
-        return item
-
-    matches = recursive_query(header["ast"], filter_xzy, xform)
-    # matches = list(set(matches))
-    write_json(matches, header["out_dir"] + "/" + header["filename"] + ".matches.json")
-
-
-def main():
-    main_old()
-    # main_new()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/gen/gen_cdl.py b/gen/gen_cdl.py
new file mode 100755
index 0000000..96084c4
--- /dev/null
+++ b/gen/gen_cdl.py
@@ -0,0 +1,430 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import os
+import json
+
+from ast_parser import ASTParser
+from simple_ast import SimpleAST
+
+
+def join_dir_and_filenames(dirname, filenames):
+    paths = []
+    for basename in filenames:
+        path = dirname + basename
+        paths.append(path)
+    return paths
+
+
+def read_file(path):
+    with open(path) as f:
+        file_content = f.read()
+    return file_content
+
+
+# out-dir is in-dir if not specified
+def create_header(path, out_dir=None):
+    header = {"path": "",
+              "dir": "",
+              "filename": "",
+              "out_dir": "",
+              "sourcecode": ""}
+
+    header["path"] = path
+    header["dir"] = os.path.dirname(path)
+    header["filename"] = os.path.basename(path)
+
+    header["out_dir"] = header["dir"]
+    if out_dir:
+        header["out_dir"] = out_dir
+
+    header["sourcecode"] = read_file(path)
+    return header
+
+
+def write_json(content, outpath):
+    # create path if not existing
+    out_dir = os.path.dirname(outpath)
+    if not os.path.isdir(out_dir):
+        os.makedirs(out_dir)
+    # write
+    with open(outpath, "w+") as f:
+        json.dump(content, f, indent=4)
+
+
+# generates simple-ast for each header specified in spec out dir.
+def main_old():
+    parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
+
+    in_dir = r"/Users/heck/local-default/include/pEp/"
+    filenames = ["pEpEngine.h",
+                 "keymanagement.h",
+                 "message_api.h",
+                 "message.h",
+                 "sync_api.h",
+                 "key_reset.h",
+                 "Identity.h",
+                 "Rating.h"]
+
+    out_dir = "data/output"
+
+    paths = join_dir_and_filenames(in_dir, filenames)
+
+    headers = []
+    for path in paths:
+        headers.append(create_header(path, out_dir))
+
+    for header in headers:
+        print("processing path: " + header["path"] + "...")
+        header["ast"] = parser.parse(header["path"], header["sourcecode"])
+        write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json")
+
+        simpleAst = SimpleAST()
+        header["simple_ast"] = simpleAst.create_simple_ast(header["ast"])
+        write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json")
+
+
+# works on valid json like structure lists/dict
+def recursive_query(data, filter, transform=lambda x: x):
+    # all datatypes accepted
+    # print(type(data))
+    # assert (type(data) in [list, dict, int, str, bool])
+    resultset = []
+
+    # decorator just handling exceptions
+    def filter_decorator(data):
+        try:
+            return filter(data)
+        except KeyError:
+            pass
+
+    # filter current data
+    # only dict types are filtered
+    if type(data) in [dict]:
+        if filter_decorator(data):
+            # transform result
+            xformed = transform(data)
+            if xformed:
+                resultset.append(xformed)
+
+    if isinstance(data, dict):
+        for item in data.values():
+            childres = recursive_query(item, filter, transform)
+            if childres:
+                resultset += childres
+    elif isinstance(data, list):
+        for item in data:
+            childres = recursive_query(item, filter, transform)
+            if childres:
+                resultset += childres
+
+    return resultset
+
+
+def extract_forward_declarations(data):
+    def filter(item):
+        if (item["is_definition"] == False
+            and item["kind"] == "CursorKind.STRUCT_DECL"
+        ):
+            return True
+
+    def xform(item):
+        return item
+
+    return recursive_query(data, filter, xform)
+
+
+def extract_functions(data, function_names):
+    def filter(item):
+        if (item["kind"] == "CursorKind.FUNCTION_DECL"
+            and item["name"] in function_names
+        ):
+            return True
+
+    def xform(item):
+        return item
+
+    return recursive_query(data, filter, xform)
+
+
+# just the typenames of all typerefs no dups
+def collect_typerefs(data):
+    def filter(item):
+        if (item["kind"] == "CursorKind.TYPE_REF"
+        ):
+            return True
+
+    def xform(item):
+        return item["type"]
+
+    results = recursive_query(data, filter, xform)
+    no_dups = list(set(results))
+    return no_dups
+
+
+def extract_vars(data, var_names):
+    def filter(item):
+        if (item["kind"] == "CursorKind.VAR_DECL"
+            and item["name"] in var_names
+        ):
+            return True
+
+    def xform(item):
+        return item
+
+    return recursive_query(data, filter, xform)
+
+
+def extract_types(data):
+    def filter(item):
+        if (item["kind"] == "CursorKind.TYPEDEF_DECL"
+            or item["kind"] == "CursorKind.ENUM_DECL"
+            or item["kind"] == "CursorKind.STRUCT_DECL"
+        ):
+            return True
+
+    def xform(item):
+        # ret = {}
+        # ret["name"] = item["name"]
+        # if "utypekind" in item:
+        #     ret["utypekind"] = item["utypekind"]
+        # else:
+        #     ret["utypekind"] = item["type"]
+        ret = item["name"]
+        return ret
+
+    return recursive_query(data, filter, xform)
+
+
+def remove_dup_dicts(arr_of_dicts):
+    arr_no_dups = [dict(i) for i in {tuple(d.items()) for d in arr_of_dicts}]
+    return arr_no_dups
+
+
+def resolve_typerefs(ast, typeref_names):
+    types = []
+    typeref_resolved = []
+    typeref_notfound = []
+
+    for typeref_name in typeref_names:
+        res = resolve_typeref(ast, typeref_name)
+        if not res:
+            typeref_notfound.append(typeref_name)
+        else:
+            types.append(res)
+            typeref_resolved.append(typeref_name)
+
+    return (types, typeref_resolved, typeref_notfound)
+
+
+def resolve_typeref(ast, typeref_name):
+    ret = None
+
+    ret = extract_enum_decl(ast, typeref_name)
+    if not ret:
+        ret = extract_struct_decl(ast, typeref_name)
+        if not ret:
+            ret = extract_typedef_decl(ast, typeref_name)
+
+    return ret
+
+
+def extract_enum_decl(ast, name):
+    ret = None
+
+    def filter(data):
+        if (data["kind"] == "CursorKind.ENUM_DECL"
+            and data["type"] == name
+        ):
+            return True
+
+    res = recursive_query(ast, filter)
+    if len(res) > 1:
+        assert (False, "duplicate definition")
+    if len(res) == 1:
+        ret = res[0]
+
+    return ret
+
+
+def extract_struct_decl(ast, name):
+    ret = None
+
+    def filter(data):
+        if (data["kind"] == "CursorKind.STRUCT_DECL"
+            and data["type"] == name
+        ):
+            return True
+
+    res = recursive_query(ast, filter)
+    if len(res) > 1:
+        assert (False, "duplicate definition")
+    if len(res) == 1:
+        ret = res[0]
+
+    return ret
+
+
+def extract_typedef_decl(ast, name):
+    ret = None
+
+    def filter(data):
+        if (data["kind"] == "CursorKind.TYPEDEF_DECL"
+            and data["type"] == name
+        ):
+            return True
+
+    res = recursive_query(ast, filter)
+    if res:
+        ret = res.pop()
+
+        if ret["utypekind"] == "Typedef":
+            ret = extract_typedef_decl(ast, ret["utype"])
+        elif ret["utypekind"] == "Elaborated":
+            ret = extract_enum_decl(ast, ret["utype"]) or extract_struct_decl(ast, ret["utype"])
+
+    return ret
+
+
+# def remove_already_resolved(resolved, typerefs):
+#     unresolved = []
+#     for typeref in typerefs:
+#         def filter(data):
+#             if (data["kind"] == "CursorKind.STRUCT_DECL"
+#                 or data["kind"] == "CursorKind.ENUM_DECL" #maybe typedef
+#                 and data["type"] == typeref["type"]):
+#                 return True
+#
+#         contained = recursive_query(resolved, filter)
+#         if not contained:
+#             unresolved.append(typeref)
+#     return unresolved
+
+def find_dup_types(data):
+    def filter(item):
+        if (item["kind"] == "CursorKind.STRUCT_DECL"
+            or item["kind"] == "CursorKind.ENUM_DECL"):
+            return True
+
+    def xform(item):
+        return item["type"]
+
+    all_types = recursive_query(data, filter, xform)
+    dups = set()
+    for type in all_types:
+        if all_types.count(type) > 1:
+            dups.add(type)
+
+    return (all_types, list(dups))
+
+
+def main_new():
+    ast_parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
+
+    function_names = {""}
+    # function_names.add("func_void_void")
+    # function_names.add("func_void_int")
+    # function_names.add("func_int_void")
+    # function_names.add("func_int_int")
+    # function_names.add("func__PS_V")
+    # function_names.add("func__E_V")
+    # function_names.add("func_TP_V")
+    # function_names.add("func_V_TP")
+    # function_names.add("func_TP_TP")
+    # function_names.add("func_APS_V")
+    # function_names.add("func_V_APS")
+    # function_names.add("func_APS_APS")
+    # function_names.add("func_TPS_V")
+    # function_names.add("func_V_TPS")
+    # function_names.add("func_V_TPS")
+    # function_names.add("func_TPS_TPS")
+    # function_names.add("func_TPS_TPS")
+
+    var_names = {""}
+    # var_names.add("var_P")
+    # var_names.add("var__E")
+    # var_names.add("var_E")
+    # var_names.add("var_AE")
+    # var_names.add("var__PS")
+    # var_names.add("var_PS")
+    # var_names.add("var_APS")
+    # var_names.add("var__CS")
+    # var_names.add("var_CS")
+    # var_names.add("var_ACS")
+    # var_names.add("var__CCS")
+    # var_names.add("var_CCS")
+    # var_names.add("var_ACCS")
+    var_names.add("var__HS")
+    # var_names.add("var__NCS")
+    # var_names.add("var_HS")
+    # var_names.add("var__NSP")
+    # var_names.add("var_AHS")
+    # var_names.add("var__HHS")
+    # var_names.add("var__NHS")
+    # var_names.add("var__NNPS")
+    # var_names.add("var_HHS")
+    # var_names.add("var__NHS1")
+    # var_names.add("var__NNCS")
+    # var_names.add("var__NENHS")
+    # var_names.add("var_ASHS")
+    # var_names.add("var__HS1")
+    # var_names.add("var_NEHS1")
+
+    # header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./")
+    header = create_header("data/input/test_data/test_lib.h")
+
+    header["ast"] = ast_parser.parse(header["path"], follow_includes=True)
+    write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json")
+
+    # VIEWS
+    views = {}
+    views["forward_declarations"] = extract_forward_declarations(header["ast"])
+    views["types"] = extract_types(header["ast"])
+    views["types_count"] = len(views["types"])
+
+    # CDL
+    # AST nonrmalizing
+
+
+    cdl = {"functions": "",
+           "vars": "",
+           "structs": "",
+           "enums": ""}
+
+    # stage 1: extract functions and vars
+    cdl["functions"] = extract_functions(header["ast"], function_names)
+    cdl["vars"] = extract_vars(header["ast"], var_names)
+
+    # stage 2: collect type refs
+    cdl["types_resolved"] = []
+    cdl["typerefs_unresolved"] = []
+    cdl["typerefs_resolved"] = []
+    cdl["typerefs_notfound"] = []
+
+    while True:
+        cdl["typerefs_unresolved"] = collect_typerefs(cdl)  # only list of typenames
+        cdl["typerefs_unresolved"] = list(set(cdl["typerefs_unresolved"]) - (set(cdl["typerefs_resolved"]).union(set(cdl["typerefs_notfound"]))))
+        if (len(cdl["typerefs_unresolved"]) <= 0):
+            break
+
+        (types_resolved, typerefs_resolved, notfound) = resolve_typerefs(header["ast"], cdl["typerefs_unresolved"])
+        cdl["types_resolved"] += types_resolved
+        cdl["typerefs_resolved"] += typerefs_resolved
+        cdl["typerefs_notfound"] += notfound
+
+
+    (views["all_types"], views["dup_types"]) = find_dup_types(cdl)
+
+    header["cdl"] = cdl
+    write_json(header["cdl"], header["out_dir"] + "/" + header["filename"] + ".cdl.json")
+
+    header["views"] = views
+    write_json(header["views"], header["out_dir"] + "/" + header["filename"] + ".views.json")
+
+
+def main():
+    # main_old()
+    main_new()
+
+
+if __name__ == "__main__":
+    main()