# -*- coding: utf-8 -*- import os import json from . import ast_parser def join_dir_and_filenames(dirname, filenames): paths = [] for basename in filenames: path = dirname + basename paths.append(path) return paths def read_file(path): with open(path) as f: file_content = f.read() return file_content # out-dir is in-dir if not specified def create_header(path, out_dir=None): header = {"path": "", "dir": "", "filename": "", "out_dir": "", "sourcecode": ""} header["path"] = path header["dir"] = os.path.dirname(path) header["filename"] = os.path.basename(path) header["out_dir"] = header["dir"] if out_dir: header["out_dir"] = out_dir header["sourcecode"] = read_file(path) return header def write_json(content, outpath): # create path if not existing out_dir = os.path.dirname(outpath) if not os.path.isdir(out_dir): os.makedirs(out_dir) # write with open(outpath, "w+") as f: json.dump(content, f, indent=4) # works on valid json like structure lists/dict def recursive_query(data, filter, transform=lambda x: x): # all datatypes accepted # print(type(data)) # assert (type(data) in [list, dict, int, str, bool]) resultset = [] # decorator just handling exceptions def filter_decorator(data): try: return filter(data) except KeyError: pass # filter current data # only dict types are filtered if type(data) in [dict]: if filter_decorator(data): # transform result xformed = transform(data) if xformed: resultset.append(xformed) if isinstance(data, dict): for item in data.values(): childres = recursive_query(item, filter, transform) if childres: resultset += childres elif isinstance(data, list): for item in data: childres = recursive_query(item, filter, transform) if childres: resultset += childres return resultset def extract_forward_declarations(data): def filter(item): if (item["is_definition"] == False and item["kind"] == "CursorKind.STRUCT_DECL" ): return True def xform(item): return item return recursive_query(data, filter, xform) def extract_functions(data, function_names): def filter(item): if (item["kind"] == "CursorKind.FUNCTION_DECL" and item["name"] in function_names ): return True def xform(item): return item return recursive_query(data, filter, xform) # just the typenames of all typerefs no dups def collect_typerefs(data): def filter(item): if (item["kind"] == "CursorKind.TYPE_REF" ): return True def xform(item): return item["type"] results = recursive_query(data, filter, xform) no_dups = list(set(results)) return no_dups def extract_vars(data, var_names): def filter(item): if (item["kind"] == "CursorKind.VAR_DECL" and item["name"] in var_names ): return True def xform(item): return item return recursive_query(data, filter, xform) def extract_types(data): def filter(item): if (item["kind"] == "CursorKind.TYPEDEF_DECL" or item["kind"] == "CursorKind.ENUM_DECL" or item["kind"] == "CursorKind.STRUCT_DECL" ): return True def xform(item): ret = item["name"] return ret return recursive_query(data, filter, xform) def remove_dup_dicts(arr_of_dicts): arr_no_dups = [dict(i) for i in {tuple(d.items()) for d in arr_of_dicts}] return arr_no_dups def resolve_typerefs(ast, typeref_names): types = [] typeref_resolved = [] typeref_notfound = [] for typeref_name in typeref_names: res = resolve_typeref(ast, typeref_name) if not res: typeref_notfound.append(typeref_name) else: types.append(res) typeref_resolved.append(typeref_name) return (types, typeref_resolved, typeref_notfound) def resolve_typeref(ast, typeref_name): ret = None ret = extract_enum_decl(ast, typeref_name) if not ret: ret = extract_struct_decl(ast, typeref_name) if not ret: ret = extract_typedef_decl(ast, typeref_name) return ret def extract_enum_decl(ast, name): ret = None def filter(data): if (data["kind"] == "CursorKind.ENUM_DECL" and data["type"] == name ): return True res = recursive_query(ast, filter) if len(res) > 1: assert False, "duplicate definition" if len(res) == 1: ret = res[0] return ret def extract_struct_decl(ast, name): ret = None def filter(data): if (data["kind"] == "CursorKind.STRUCT_DECL" and data["type"] == name ): return True res = recursive_query(ast, filter) if len(res) > 1: assert False, "duplicate definition" if len(res) == 1: ret = res[0] return ret def extract_typedef_decl(ast, name): ret = None def filter(data): if (data["kind"] == "CursorKind.TYPEDEF_DECL" and data["type"] == name ): return True res = recursive_query(ast, filter) if res: ret = res.pop() if ret["utypekind"] == "Typedef": ret = extract_typedef_decl(ast, ret["utype"]) elif ret["utypekind"] == "Elaborated": ret = extract_enum_decl(ast, ret["utype"]) or extract_struct_decl(ast, ret["utype"]) return ret def find_dup_types(data): def filter(item): if (item["kind"] == "CursorKind.STRUCT_DECL" or item["kind"] == "CursorKind.ENUM_DECL"): return True def xform(item): return item["type"] all_types = recursive_query(data, filter, xform) dups = set() for type in all_types: if all_types.count(type) > 1: dups.add(type) return (all_types, list(dups)) def parse(libclang_path, function_names, header_filename, var_names): astp = ast_parser.ASTParser(libclang_path) # header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./") header = create_header(header_filename) header["ast"] = astp.parse(header["path"], follow_includes=True) # VIEWS views = {} views["forward_declarations"] = extract_forward_declarations(header["ast"]) views["types"] = extract_types(header["ast"]) views["types_count"] = len(views["types"]) # CDL cid = {"functions": "", "vars": "", "structs": "", "enums": ""} # stage 1: extract functions and vars cid["functions"] = extract_functions(header["ast"], function_names) cid["vars"] = extract_vars(header["ast"], var_names) # stage 2: collect type refs cid["types_resolved"] = [] cid["typerefs_unresolved"] = [] cid["typerefs_resolved"] = [] cid["typerefs_notfound"] = [] while True: cid["typerefs_unresolved"] = collect_typerefs(cid) # only list of typenames cid["typerefs_unresolved"] = list(set(cid["typerefs_unresolved"]) - (set(cid["typerefs_resolved"]).union(set(cid["typerefs_notfound"])))) if (len(cid["typerefs_unresolved"]) <= 0): break (types_resolved, typerefs_resolved, notfound) = resolve_typerefs(header["ast"], cid["typerefs_unresolved"]) cid["types_resolved"] += types_resolved cid["typerefs_resolved"] += typerefs_resolved cid["typerefs_notfound"] += notfound header["cid"] = cid header["views"] = views (views["all_types"], views["dup_types"]) = find_dup_types(header["cid"]) (views["ast_all_types"], views["ast_dup_types"]) = find_dup_types(header["ast"]) return header # generates simple-ast for each header specified in spec out dir. # def main_old(): # parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") # # in_dir = r"/Users/heck/local-default/include/pEp/" # filenames = ["pEpEngine.h", # "keymanagement.h", # "message_api.h", # "message.h", # "sync_api.h", # "key_reset.h", # "Identity.h", # "Rating.h"] # # out_dir = "data/output" # # paths = join_dir_and_filenames(in_dir, filenames) # # headers = [] # for path in paths: # headers.append(create_header(path, out_dir)) # # for header in headers: # print("processing path: " + header["path"] + "...") # header["ast"] = parser.parse(header["path"], header["sourcecode"]) # write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") # # simpleAst = SimpleAST() # header["simple_ast"] = simpleAst.create_simple_ast(header["ast"]) # write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json") # #