#!/usr/bin/env python3 # -*- coding: utf-8 -*- import os import json from ast_parser import ASTParser from simple_ast import SimpleAST def join_dir_and_filenames(dirname, filenames): paths = [] for basename in filenames: path = dirname + basename paths.append(path) return paths def read_file(path): with open(path) as f: file_content = f.read() return file_content # out-dir is in-dir if not specified def create_header(path, out_dir=None): header = {"path": "", "dir": "", "filename": "", "out_dir": "", "sourcecode": ""} header["path"] = path header["dir"] = os.path.dirname(path) header["filename"] = os.path.basename(path) header["out_dir"] = header["dir"] if out_dir: header["out_dir"] = out_dir header["sourcecode"] = read_file(path) return header def write_json(content, outpath): # create path if not existing out_dir = os.path.dirname(outpath) if not os.path.isdir(out_dir): os.makedirs(out_dir) # write with open(outpath, "w+") as f: json.dump(content, f, indent=4) # works on valid json like structure lists/dict def recursive_query(data, filter, transform=lambda x: x): # all datatypes accepted # print(type(data)) # assert (type(data) in [list, dict, int, str, bool]) resultset = [] # decorator just handling exceptions def filter_decorator(data): try: return filter(data) except KeyError: pass # filter current data # only dict types are filtered if type(data) in [dict]: if filter_decorator(data): # transform result xformed = transform(data) if xformed: resultset.append(xformed) if isinstance(data, dict): for item in data.values(): childres = recursive_query(item, filter, transform) if childres: resultset += childres elif isinstance(data, list): for item in data: childres = recursive_query(item, filter, transform) if childres: resultset += childres return resultset def extract_forward_declarations(data): def filter(item): if (item["is_definition"] == False and item["kind"] == "CursorKind.STRUCT_DECL" ): return True def xform(item): return item return recursive_query(data, filter, xform) def extract_functions(data, function_names): def filter(item): if (item["kind"] == "CursorKind.FUNCTION_DECL" and item["name"] in function_names ): return True def xform(item): return item return recursive_query(data, filter, xform) # just the typenames of all typerefs no dups def collect_typerefs(data): def filter(item): if (item["kind"] == "CursorKind.TYPE_REF" ): return True def xform(item): return item["type"] results = recursive_query(data, filter, xform) no_dups = list(set(results)) return no_dups def extract_vars(data, var_names): def filter(item): if (item["kind"] == "CursorKind.VAR_DECL" and item["name"] in var_names ): return True def xform(item): return item return recursive_query(data, filter, xform) def extract_types(data): def filter(item): if (item["kind"] == "CursorKind.TYPEDEF_DECL" or item["kind"] == "CursorKind.ENUM_DECL" or item["kind"] == "CursorKind.STRUCT_DECL" ): return True def xform(item): ret = item["name"] return ret return recursive_query(data, filter, xform) def remove_dup_dicts(arr_of_dicts): arr_no_dups = [dict(i) for i in {tuple(d.items()) for d in arr_of_dicts}] return arr_no_dups def resolve_typerefs(ast, typeref_names): types = [] typeref_resolved = [] typeref_notfound = [] for typeref_name in typeref_names: res = resolve_typeref(ast, typeref_name) if not res: typeref_notfound.append(typeref_name) else: types.append(res) typeref_resolved.append(typeref_name) return (types, typeref_resolved, typeref_notfound) def resolve_typeref(ast, typeref_name): ret = None ret = extract_enum_decl(ast, typeref_name) if not ret: ret = extract_struct_decl(ast, typeref_name) if not ret: ret = extract_typedef_decl(ast, typeref_name) return ret def extract_enum_decl(ast, name): ret = None def filter(data): if (data["kind"] == "CursorKind.ENUM_DECL" and data["type"] == name ): return True res = recursive_query(ast, filter) if len(res) > 1: assert False, "duplicate definition" if len(res) == 1: ret = res[0] return ret def extract_struct_decl(ast, name): ret = None def filter(data): if (data["kind"] == "CursorKind.STRUCT_DECL" and data["type"] == name ): return True res = recursive_query(ast, filter) if len(res) > 1: assert False, "duplicate definition" if len(res) == 1: ret = res[0] return ret def extract_typedef_decl(ast, name): ret = None def filter(data): if (data["kind"] == "CursorKind.TYPEDEF_DECL" and data["type"] == name ): return True res = recursive_query(ast, filter) if res: ret = res.pop() if ret["utypekind"] == "Typedef": ret = extract_typedef_decl(ast, ret["utype"]) elif ret["utypekind"] == "Elaborated": ret = extract_enum_decl(ast, ret["utype"]) or extract_struct_decl(ast, ret["utype"]) return ret def find_dup_types(data): def filter(item): if (item["kind"] == "CursorKind.STRUCT_DECL" or item["kind"] == "CursorKind.ENUM_DECL"): return True def xform(item): return item["type"] all_types = recursive_query(data, filter, xform) dups = set() for type in all_types: if all_types.count(type) > 1: dups.add(type) return (all_types, list(dups)) def main_new(): ast_parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") function_names = {""} # function_names.add("func_void_void") # function_names.add("func_void_int") # function_names.add("func_int_void") # function_names.add("func_int_int") # function_names.add("func__PS_V") # function_names.add("func__E_V") # function_names.add("func_TP_V") # function_names.add("func_V_TP") # function_names.add("func_TP_TP") # function_names.add("func_APS_V") # function_names.add("func_V_APS") # function_names.add("func_APS_APS") # function_names.add("func_TPS_V") # function_names.add("func_V_TPS") # function_names.add("func_V_TPS") # function_names.add("func_TPS_TPS") # function_names.add("func_TPS_TPS") var_names = {""} # var_names.add("var_P") # var_names.add("var__E") # var_names.add("var_E") # var_names.add("var_AE") # var_names.add("var__PS") # var_names.add("var_PS") # var_names.add("var_APS") # var_names.add("var__CS") # var_names.add("var_CS") # var_names.add("var_ACS") # var_names.add("var__CCS") # var_names.add("var_CCS") # var_names.add("var_ACCS") var_names.add("var__HS") # var_names.add("var__NCS") # var_names.add("var_HS") # var_names.add("var__NSP") # var_names.add("var_AHS") # var_names.add("var__HHS") # var_names.add("var__NHS") # var_names.add("var__NNPS") # var_names.add("var_HHS") # var_names.add("var__NHS1") # var_names.add("var__NNCS") # var_names.add("var__NENHS") # var_names.add("var_ASHS") # var_names.add("var__HS1") # var_names.add("var_NEHS1") # header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./") header = create_header("data/input/test_data/test_lib.h") header["ast"] = ast_parser.parse(header["path"], follow_includes=True) write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") # VIEWS views = {} views["forward_declarations"] = extract_forward_declarations(header["ast"]) views["types"] = extract_types(header["ast"]) views["types_count"] = len(views["types"]) # CDL # AST nonrmalizing cdl = {"functions": "", "vars": "", "structs": "", "enums": ""} # stage 1: extract functions and vars cdl["functions"] = extract_functions(header["ast"], function_names) cdl["vars"] = extract_vars(header["ast"], var_names) # stage 2: collect type refs cdl["types_resolved"] = [] cdl["typerefs_unresolved"] = [] cdl["typerefs_resolved"] = [] cdl["typerefs_notfound"] = [] while True: cdl["typerefs_unresolved"] = collect_typerefs(cdl) # only list of typenames cdl["typerefs_unresolved"] = list(set(cdl["typerefs_unresolved"]) - (set(cdl["typerefs_resolved"]).union(set(cdl["typerefs_notfound"])))) if (len(cdl["typerefs_unresolved"]) <= 0): break (types_resolved, typerefs_resolved, notfound) = resolve_typerefs(header["ast"], cdl["typerefs_unresolved"]) cdl["types_resolved"] += types_resolved cdl["typerefs_resolved"] += typerefs_resolved cdl["typerefs_notfound"] += notfound (views["all_types"], views["dup_types"]) = find_dup_types(cdl) (views["ast_all_types"], views["ast_dup_types"]) = find_dup_types(header["ast"]) header["cdl"] = cdl write_json(header["cdl"], header["out_dir"] + "/" + header["filename"] + ".cdl.json") header["views"] = views write_json(header["views"], header["out_dir"] + "/" + header["filename"] + ".views.json") # generates simple-ast for each header specified in spec out dir. def main_old(): parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") in_dir = r"/Users/heck/local-default/include/pEp/" filenames = ["pEpEngine.h", "keymanagement.h", "message_api.h", "message.h", "sync_api.h", "key_reset.h", "Identity.h", "Rating.h"] out_dir = "data/output" paths = join_dir_and_filenames(in_dir, filenames) headers = [] for path in paths: headers.append(create_header(path, out_dir)) for header in headers: print("processing path: " + header["path"] + "...") header["ast"] = parser.parse(header["path"], header["sourcecode"]) write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") simpleAst = SimpleAST() header["simple_ast"] = simpleAst.create_simple_ast(header["ast"]) write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json") def main(): # main_old() main_new() if __name__ == "__main__": main()