#!/usr/bin/env python3 # -*- coding: utf-8 -*- import os import json from ast_parser import AST_Parser def create_paths_list(dirname, filenames): paths = [] for basename in filenames: path = dirname + basename paths.append(path) return paths def read_files(paths): content = [] for path in paths: file_info = read_file(path) content.append(file_info) return content def read_file(path): with open(path) as f: file_content = f.read() item = {"path": path, "sourcecode": file_content} return item def write_json(header, key): outpath = header["outpath"] + "." + key + ".json" with open(outpath, "w+") as f: json.dump(header[key], f, indent=4) def prepare_header(header, out_dir): basename = os.path.basename(header.get("path")) outpath = out_dir + basename header["outpath"] = outpath return header def create_simple_ast(ast): elems = {"functions": "", "structs": "", "enums": ""} elems["functions"] = extract_functions_from_ast(ast) elems["structs"] = extract_structs_from_ast(ast) elems["enums"] = extract_enums_from_ast(ast) return elems # extracts top level functions only (is there anything else in C?) def extract_functions_from_ast(ast): functions = [] for child in ast["children"]: if child["kind"] == "CursorKind.FUNCTION_DECL": functions.append(simple_ast_functions(child)) return functions def simple_ast_functions(func_ast): simple_func = {"name": "", "return_type": "", "arguments": []} simple_func["name"] = func_ast["name"] simple_func["return_type"] = func_ast["result_type"] arguments = [] # check if func has args if "children" in func_ast: for arg_ast in func_ast["children"]: arg_simple = None if arg_ast["kind"] == "CursorKind.PARM_DECL": arg_simple = {"name": "", "type": ""} arg_simple["name"] = arg_ast["name"] arg_simple["type"] = arg_ast["type"] if arg_simple: arguments.append(arg_simple) if arguments: simple_func["arguments"] = arguments return simple_func # only typedef enums def extract_enums_from_ast(ast): enums = [] for typedef in ast["children"]: if typedef["kind"] == "CursorKind.TYPEDEF_DECL": typename = typedef["type"] if "children" in typedef: for enum in typedef["children"]: if enum["kind"] == "CursorKind.ENUM_DECL": enums.append(simple_ast_enums(enum, typename)) return enums def simple_ast_enums(enum_ast, typename): simple_enum = {"name": typename, "items": []} items = [] if "children" in enum_ast: for item in enum_ast["children"]: if item["kind"] == "CursorKind.ENUM_CONSTANT_DECL": simple_item = {"name": "", "value": ""} simple_item["name"] = item["name"] simple_item["value"] = item["value"] items.append(simple_item) if items: simple_enum["items"] = items return simple_enum # only typedef structs def extract_structs_from_ast(ast): structs = [] for typedef in ast["children"]: if typedef["kind"] == "CursorKind.TYPEDEF_DECL": typename = typedef["type"] if "children" in typedef: for struct in typedef["children"]: if struct["kind"] == "CursorKind.STRUCT_DECL": structs.append(simple_ast_structs(struct, typename)) return structs def simple_ast_structs(struct_ast, typename): simple_struct = {"name" : typename, "fields" : []} fields = [] if "children" in struct_ast: for field in struct_ast["children"]: if field["kind"] == "CursorKind.FIELD_DECL": simple_field = {"name": "", "type": ""} simple_field["name"] = field["name"] simple_field["type"] = field["type"] fields.append(simple_field) if fields: simple_struct["fields"] = fields return simple_struct def main(): input() parser = AST_Parser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") # Input prefix = r"/Users/heck/local-default/" filenames = ["pEpEngine.h", "keymanagement.h"] # Output out_dir = "data/output/" if not os.path.isdir(out_dir): os.makedirs(out_dir) in_dir = prefix + r"include/pEp/" paths = create_paths_list(in_dir, filenames) headers = read_files(paths) for header in headers: header = prepare_header(header, out_dir) print("processing path: " + header.get("path") + "...") header["ast"] = parser.parse(header["path"], header["sourcecode"]) write_json(header, "ast") header["simple_ast"] = create_simple_ast(header["ast"]) write_json(header, "simple_ast") if __name__ == "__main__": main()