diff --git a/gen/extract.py b/gen/extract.py index 16d103a..5ccb6cc 100755 --- a/gen/extract.py +++ b/gen/extract.py @@ -1,10 +1,11 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- - +import glob import re import os -import pprint import json +import clang.cindex +from clang.cindex import CursorKind def create_paths_list(dirname, filenames): @@ -27,61 +28,92 @@ def read_file(path): with open(path) as f: file_content = f.read() item = {"path": path, - "content": file_content} + "content_orig": file_content} return item -def extract_functions(file_content): - pattr = re.compile("DYNAMIC_API.*?\);", re.DOTALL) - res = pattr.findall(file_content) - return res +def write_json(header): + header["outpath"] += ".json" + with open(header.get("outpath"), "w+") as f: + json.dump(header, f, indent=4) -def process(header, out_dir): - # Process and create data structure - print("processing path: " + header.get("path") + "...") +def prepare_header(header, out_dir): basename = os.path.basename(header.get("path")) - # strip suffix, will be added output format dependently - basename_minus_suffix = re.sub("\..*$",'',basename) - - # add outpath - outpath = out_dir + basename_minus_suffix + outpath = out_dir + basename header["outpath"] = outpath + return header - # add functions - functions = extract_functions(header.get("content")) - header["functions"] = functions - return header +## PARSE +def clang_parse(filename, content): + index = clang.cindex.Index.create() + arguments = ["-x", "c"] + # arguments = ["-x", "c++", "-D__CODEGEN__"] + options = clang.cindex.TranslationUnit.PARSE_SKIP_FUNCTION_BODIES -def write_json(header): - header["outpath"] += ".json" - with open(header.get("outpath"), "w+") as f: - json.dump(header, f, indent=4) + content = [(filename, content)] + translation_unit = index.parse(filename, unsaved_files=content, args=arguments, options=options) + + ret = heckparse(translation_unit.cursor, filename) + return ret +def get_children_filelocal(cursor, path): + return [c for c in cursor.get_children() if c.location.file and c.location.file.name == path] + + +def heckparse(cursor, path): + item = {} + item["type"] = str(cursor.kind) + item["name"] = cursor.spelling + if cursor.kind == CursorKind.FUNCTION_DECL: + item["proto"] = cursor.displayname + if cursor.kind == CursorKind.PARM_DECL: + item["c_type"] = str(cursor.type.spelling) + + if cursor.kind == CursorKind.ENUM_CONSTANT_DECL: + item["value"] = cursor.enum_value + + children = get_children_filelocal(cursor, path) + if len(children) > 0: + childy = [] + for child in children: + childy.append(heckparse(child, path)) + + item["children"] = childy + + return item + + +## INIT +def init_libclang(library_file=None): + if not clang.cindex.Config.loaded: + print("Using libclang from: %s", library_file) + clang.cindex.Config.set_library_file(library_file) + def main(): + input() + init_libclang("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") # Input prefix = r"/Users/heck/local-default/" filenames = ["pEpEngine.h", "keymanagement.h"] # Output - out_dir = "data/" - - basename = prefix + r"include/pEp/" - + out_dir = "data/output/" if not os.path.isdir(out_dir): os.makedirs(out_dir) - paths = create_paths_list(basename, filenames) + in_dir = prefix + r"include/pEp/" + paths = create_paths_list(in_dir, filenames) headers = read_files(paths) for header in headers: - # Process - header = process(header, out_dir) - # Output + header = prepare_header(header, out_dir) + print("processing path: " + header.get("path") + "...") + header["content_parsed"] = clang_parse(header["path"],header["content_orig"]) write_json(header)