From 261f0e4ca6b17f284247cb361de56f12bcd6f068 Mon Sep 17 00:00:00 2001 From: heck Date: Sun, 20 Dec 2020 00:29:12 +0100 Subject: [PATCH] rename losing diff sucks, but... --- gen/extract.py | 143 ---------------- gen/gen_cdl.py | 430 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 430 insertions(+), 143 deletions(-) delete mode 100755 gen/extract.py create mode 100755 gen/gen_cdl.py diff --git a/gen/extract.py b/gen/extract.py deleted file mode 100755 index 202b235..0000000 --- a/gen/extract.py +++ /dev/null @@ -1,143 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -import os -import json - -from ast_parser import ASTParser -from simple_ast import SimpleAST - - -def join_dir_and_filenames(dirname, filenames): - paths = [] - for basename in filenames: - path = dirname + basename - paths.append(path) - return paths - - -def read_file(path): - with open(path) as f: - file_content = f.read() - return file_content - - -# out-dir is in-dir if not specified -def create_header(path, out_dir=None): - header = {"path": "", - "dir": "", - "filename": "", - "out_dir": "", - "sourcecode": ""} - - header["path"] = path - header["dir"] = os.path.dirname(path) - header["filename"] = os.path.basename(path) - - header["out_dir"] = header["dir"] - if out_dir: - header["out_dir"] = out_dir - - header["sourcecode"] = read_file(path) - return header - - -def write_json(content, outpath): - # create path if not existing - out_dir = os.path.dirname(outpath) - if not os.path.isdir(out_dir): - os.makedirs(out_dir) - # write - with open(outpath, "w+") as f: - json.dump(content, f, indent=4) - - -# generates simple-ast for each header specified in spec out dir. -def main_old(): - parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") - - in_dir = r"/Users/heck/local-default/include/pEp/" - filenames = ["pEpEngine.h", - "keymanagement.h", - "message_api.h", - "message.h", - "sync_api.h", - "key_reset.h", - "Identity.h", - "Rating.h"] - - out_dir = "data/output" - - paths = join_dir_and_filenames(in_dir, filenames) - - headers = [] - for path in paths: - headers.append(create_header(path, out_dir)) - - for header in headers: - print("processing path: " + header["path"] + "...") - header["ast"] = parser.parse(header["path"], header["sourcecode"]) - write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") - - simpleAst = SimpleAST() - header["simple_ast"] = simpleAst.create_simple_ast(header["ast"]) - write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json") - - -def recursive_query(data, filter, transform=lambda x: x): - resultset = [] - - # decorator just handling exceptions - def filter_decorator(data): - try: - return filter(data) - except KeyError: - pass - - # filter current data - if filter_decorator(data): - # transform result - xformed = transform(data) - if xformed: - resultset.append(xformed) - - # recurse - if "children" in data: - for item in data["children"]: - childres = recursive_query(item, filter, transform) - if childres: - resultset += childres - - return resultset - - -def main_new(): - parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") - - # header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./") - header = create_header("data/input/test_data/main_include.h") - - header["ast"] = parser.parse(header["path"], follow_includes=True) - write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") - - # query - def filter_xzy(item): - if (item["is_definition"] == False - and item["kind"] == "CursorKind.STRUCT_DECL" - ): - return True - - def xform(item): - return item - - matches = recursive_query(header["ast"], filter_xzy, xform) - # matches = list(set(matches)) - write_json(matches, header["out_dir"] + "/" + header["filename"] + ".matches.json") - - -def main(): - main_old() - # main_new() - - -if __name__ == "__main__": - main() diff --git a/gen/gen_cdl.py b/gen/gen_cdl.py new file mode 100755 index 0000000..96084c4 --- /dev/null +++ b/gen/gen_cdl.py @@ -0,0 +1,430 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import os +import json + +from ast_parser import ASTParser +from simple_ast import SimpleAST + + +def join_dir_and_filenames(dirname, filenames): + paths = [] + for basename in filenames: + path = dirname + basename + paths.append(path) + return paths + + +def read_file(path): + with open(path) as f: + file_content = f.read() + return file_content + + +# out-dir is in-dir if not specified +def create_header(path, out_dir=None): + header = {"path": "", + "dir": "", + "filename": "", + "out_dir": "", + "sourcecode": ""} + + header["path"] = path + header["dir"] = os.path.dirname(path) + header["filename"] = os.path.basename(path) + + header["out_dir"] = header["dir"] + if out_dir: + header["out_dir"] = out_dir + + header["sourcecode"] = read_file(path) + return header + + +def write_json(content, outpath): + # create path if not existing + out_dir = os.path.dirname(outpath) + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + # write + with open(outpath, "w+") as f: + json.dump(content, f, indent=4) + + +# generates simple-ast for each header specified in spec out dir. +def main_old(): + parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") + + in_dir = r"/Users/heck/local-default/include/pEp/" + filenames = ["pEpEngine.h", + "keymanagement.h", + "message_api.h", + "message.h", + "sync_api.h", + "key_reset.h", + "Identity.h", + "Rating.h"] + + out_dir = "data/output" + + paths = join_dir_and_filenames(in_dir, filenames) + + headers = [] + for path in paths: + headers.append(create_header(path, out_dir)) + + for header in headers: + print("processing path: " + header["path"] + "...") + header["ast"] = parser.parse(header["path"], header["sourcecode"]) + write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") + + simpleAst = SimpleAST() + header["simple_ast"] = simpleAst.create_simple_ast(header["ast"]) + write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json") + + +# works on valid json like structure lists/dict +def recursive_query(data, filter, transform=lambda x: x): + # all datatypes accepted + # print(type(data)) + # assert (type(data) in [list, dict, int, str, bool]) + resultset = [] + + # decorator just handling exceptions + def filter_decorator(data): + try: + return filter(data) + except KeyError: + pass + + # filter current data + # only dict types are filtered + if type(data) in [dict]: + if filter_decorator(data): + # transform result + xformed = transform(data) + if xformed: + resultset.append(xformed) + + if isinstance(data, dict): + for item in data.values(): + childres = recursive_query(item, filter, transform) + if childres: + resultset += childres + elif isinstance(data, list): + for item in data: + childres = recursive_query(item, filter, transform) + if childres: + resultset += childres + + return resultset + + +def extract_forward_declarations(data): + def filter(item): + if (item["is_definition"] == False + and item["kind"] == "CursorKind.STRUCT_DECL" + ): + return True + + def xform(item): + return item + + return recursive_query(data, filter, xform) + + +def extract_functions(data, function_names): + def filter(item): + if (item["kind"] == "CursorKind.FUNCTION_DECL" + and item["name"] in function_names + ): + return True + + def xform(item): + return item + + return recursive_query(data, filter, xform) + + +# just the typenames of all typerefs no dups +def collect_typerefs(data): + def filter(item): + if (item["kind"] == "CursorKind.TYPE_REF" + ): + return True + + def xform(item): + return item["type"] + + results = recursive_query(data, filter, xform) + no_dups = list(set(results)) + return no_dups + + +def extract_vars(data, var_names): + def filter(item): + if (item["kind"] == "CursorKind.VAR_DECL" + and item["name"] in var_names + ): + return True + + def xform(item): + return item + + return recursive_query(data, filter, xform) + + +def extract_types(data): + def filter(item): + if (item["kind"] == "CursorKind.TYPEDEF_DECL" + or item["kind"] == "CursorKind.ENUM_DECL" + or item["kind"] == "CursorKind.STRUCT_DECL" + ): + return True + + def xform(item): + # ret = {} + # ret["name"] = item["name"] + # if "utypekind" in item: + # ret["utypekind"] = item["utypekind"] + # else: + # ret["utypekind"] = item["type"] + ret = item["name"] + return ret + + return recursive_query(data, filter, xform) + + +def remove_dup_dicts(arr_of_dicts): + arr_no_dups = [dict(i) for i in {tuple(d.items()) for d in arr_of_dicts}] + return arr_no_dups + + +def resolve_typerefs(ast, typeref_names): + types = [] + typeref_resolved = [] + typeref_notfound = [] + + for typeref_name in typeref_names: + res = resolve_typeref(ast, typeref_name) + if not res: + typeref_notfound.append(typeref_name) + else: + types.append(res) + typeref_resolved.append(typeref_name) + + return (types, typeref_resolved, typeref_notfound) + + +def resolve_typeref(ast, typeref_name): + ret = None + + ret = extract_enum_decl(ast, typeref_name) + if not ret: + ret = extract_struct_decl(ast, typeref_name) + if not ret: + ret = extract_typedef_decl(ast, typeref_name) + + return ret + + +def extract_enum_decl(ast, name): + ret = None + + def filter(data): + if (data["kind"] == "CursorKind.ENUM_DECL" + and data["type"] == name + ): + return True + + res = recursive_query(ast, filter) + if len(res) > 1: + assert (False, "duplicate definition") + if len(res) == 1: + ret = res[0] + + return ret + + +def extract_struct_decl(ast, name): + ret = None + + def filter(data): + if (data["kind"] == "CursorKind.STRUCT_DECL" + and data["type"] == name + ): + return True + + res = recursive_query(ast, filter) + if len(res) > 1: + assert (False, "duplicate definition") + if len(res) == 1: + ret = res[0] + + return ret + + +def extract_typedef_decl(ast, name): + ret = None + + def filter(data): + if (data["kind"] == "CursorKind.TYPEDEF_DECL" + and data["type"] == name + ): + return True + + res = recursive_query(ast, filter) + if res: + ret = res.pop() + + if ret["utypekind"] == "Typedef": + ret = extract_typedef_decl(ast, ret["utype"]) + elif ret["utypekind"] == "Elaborated": + ret = extract_enum_decl(ast, ret["utype"]) or extract_struct_decl(ast, ret["utype"]) + + return ret + + +# def remove_already_resolved(resolved, typerefs): +# unresolved = [] +# for typeref in typerefs: +# def filter(data): +# if (data["kind"] == "CursorKind.STRUCT_DECL" +# or data["kind"] == "CursorKind.ENUM_DECL" #maybe typedef +# and data["type"] == typeref["type"]): +# return True +# +# contained = recursive_query(resolved, filter) +# if not contained: +# unresolved.append(typeref) +# return unresolved + +def find_dup_types(data): + def filter(item): + if (item["kind"] == "CursorKind.STRUCT_DECL" + or item["kind"] == "CursorKind.ENUM_DECL"): + return True + + def xform(item): + return item["type"] + + all_types = recursive_query(data, filter, xform) + dups = set() + for type in all_types: + if all_types.count(type) > 1: + dups.add(type) + + return (all_types, list(dups)) + + +def main_new(): + ast_parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") + + function_names = {""} + # function_names.add("func_void_void") + # function_names.add("func_void_int") + # function_names.add("func_int_void") + # function_names.add("func_int_int") + # function_names.add("func__PS_V") + # function_names.add("func__E_V") + # function_names.add("func_TP_V") + # function_names.add("func_V_TP") + # function_names.add("func_TP_TP") + # function_names.add("func_APS_V") + # function_names.add("func_V_APS") + # function_names.add("func_APS_APS") + # function_names.add("func_TPS_V") + # function_names.add("func_V_TPS") + # function_names.add("func_V_TPS") + # function_names.add("func_TPS_TPS") + # function_names.add("func_TPS_TPS") + + var_names = {""} + # var_names.add("var_P") + # var_names.add("var__E") + # var_names.add("var_E") + # var_names.add("var_AE") + # var_names.add("var__PS") + # var_names.add("var_PS") + # var_names.add("var_APS") + # var_names.add("var__CS") + # var_names.add("var_CS") + # var_names.add("var_ACS") + # var_names.add("var__CCS") + # var_names.add("var_CCS") + # var_names.add("var_ACCS") + var_names.add("var__HS") + # var_names.add("var__NCS") + # var_names.add("var_HS") + # var_names.add("var__NSP") + # var_names.add("var_AHS") + # var_names.add("var__HHS") + # var_names.add("var__NHS") + # var_names.add("var__NNPS") + # var_names.add("var_HHS") + # var_names.add("var__NHS1") + # var_names.add("var__NNCS") + # var_names.add("var__NENHS") + # var_names.add("var_ASHS") + # var_names.add("var__HS1") + # var_names.add("var_NEHS1") + + # header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./") + header = create_header("data/input/test_data/test_lib.h") + + header["ast"] = ast_parser.parse(header["path"], follow_includes=True) + write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") + + # VIEWS + views = {} + views["forward_declarations"] = extract_forward_declarations(header["ast"]) + views["types"] = extract_types(header["ast"]) + views["types_count"] = len(views["types"]) + + # CDL + # AST nonrmalizing + + + cdl = {"functions": "", + "vars": "", + "structs": "", + "enums": ""} + + # stage 1: extract functions and vars + cdl["functions"] = extract_functions(header["ast"], function_names) + cdl["vars"] = extract_vars(header["ast"], var_names) + + # stage 2: collect type refs + cdl["types_resolved"] = [] + cdl["typerefs_unresolved"] = [] + cdl["typerefs_resolved"] = [] + cdl["typerefs_notfound"] = [] + + while True: + cdl["typerefs_unresolved"] = collect_typerefs(cdl) # only list of typenames + cdl["typerefs_unresolved"] = list(set(cdl["typerefs_unresolved"]) - (set(cdl["typerefs_resolved"]).union(set(cdl["typerefs_notfound"])))) + if (len(cdl["typerefs_unresolved"]) <= 0): + break + + (types_resolved, typerefs_resolved, notfound) = resolve_typerefs(header["ast"], cdl["typerefs_unresolved"]) + cdl["types_resolved"] += types_resolved + cdl["typerefs_resolved"] += typerefs_resolved + cdl["typerefs_notfound"] += notfound + + + (views["all_types"], views["dup_types"]) = find_dup_types(cdl) + + header["cdl"] = cdl + write_json(header["cdl"], header["out_dir"] + "/" + header["filename"] + ".cdl.json") + + header["views"] = views + write_json(header["views"], header["out_dir"] + "/" + header["filename"] + ".views.json") + + +def main(): + # main_old() + main_new() + + +if __name__ == "__main__": + main()