From 5d61a317b69af67abd73ae6e9c4c567e5d180898 Mon Sep 17 00:00:00 2001 From: heck Date: Thu, 10 Dec 2020 01:46:41 +0100 Subject: [PATCH] class AST_Parser (returns a python data structure representing the AST of C source code) --- gen/extract.py | 111 +++++++++++++++++++++++++------------------------ 1 file changed, 57 insertions(+), 54 deletions(-) diff --git a/gen/extract.py b/gen/extract.py index 5ccb6cc..eccda2d 100755 --- a/gen/extract.py +++ b/gen/extract.py @@ -1,13 +1,65 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import glob -import re import os import json import clang.cindex from clang.cindex import CursorKind +class AST_Parser: + def __init__(self,library_file=None): + if not clang.cindex.Config.loaded: + print("Using libclang from: %s", library_file) + clang.cindex.Config.set_library_file(library_file) + + def parse(self, filename, content): + index = clang.cindex.Index.create() + arguments = ["-x", "c"] + options = clang.cindex.TranslationUnit.PARSE_SKIP_FUNCTION_BODIES + content = [(filename, content)] + translation_unit = index.parse(filename, unsaved_files=content, args=arguments, options=options) + ret = self._parse(translation_unit.cursor, filename) + return ret + + def _get_children_filelocal(self, cursor, path): + return [c for c in cursor.get_children() if c.location.file and c.location.file.name == path] + + def _parse(self, cursor, path): + item = {} + excluded_cursortypes = [CursorKind.INTEGER_LITERAL] + if not cursor.kind in excluded_cursortypes: + if not str(cursor.kind) == "": + item["kind"] = str(cursor.kind) + + if not cursor.spelling == "": + item["name"] = cursor.spelling + + if not cursor.displayname == "": + item["displayname"] = cursor.displayname + + if not cursor.type.spelling == "": + item["type"] = cursor.type.spelling + + if not cursor.result_type.spelling == "": + item["result_type"] = cursor.result_type.spelling + + if cursor.kind == CursorKind.ENUM_CONSTANT_DECL: + item["value"] = cursor.enum_value + + child_cursors = self._get_children_filelocal(cursor, path) + if len(child_cursors) > 0: + child_arr = [] + for child_cursor in child_cursors: + child_result = self._parse(child_cursor, path) + if child_result: + child_arr.append(child_result) + + if child_arr: + item["children"] = child_arr + + return item + + def create_paths_list(dirname, filenames): paths = [] for basename in filenames: @@ -28,7 +80,7 @@ def read_file(path): with open(path) as f: file_content = f.read() item = {"path": path, - "content_orig": file_content} + "sourcecode": file_content} return item @@ -45,58 +97,9 @@ def prepare_header(header, out_dir): return header -## PARSE -def clang_parse(filename, content): - index = clang.cindex.Index.create() - - arguments = ["-x", "c"] - # arguments = ["-x", "c++", "-D__CODEGEN__"] - options = clang.cindex.TranslationUnit.PARSE_SKIP_FUNCTION_BODIES - - content = [(filename, content)] - translation_unit = index.parse(filename, unsaved_files=content, args=arguments, options=options) - - ret = heckparse(translation_unit.cursor, filename) - return ret - - -def get_children_filelocal(cursor, path): - return [c for c in cursor.get_children() if c.location.file and c.location.file.name == path] - - -def heckparse(cursor, path): - item = {} - item["type"] = str(cursor.kind) - item["name"] = cursor.spelling - if cursor.kind == CursorKind.FUNCTION_DECL: - item["proto"] = cursor.displayname - if cursor.kind == CursorKind.PARM_DECL: - item["c_type"] = str(cursor.type.spelling) - - if cursor.kind == CursorKind.ENUM_CONSTANT_DECL: - item["value"] = cursor.enum_value - - children = get_children_filelocal(cursor, path) - if len(children) > 0: - childy = [] - for child in children: - childy.append(heckparse(child, path)) - - item["children"] = childy - - return item - - -## INIT -def init_libclang(library_file=None): - if not clang.cindex.Config.loaded: - print("Using libclang from: %s", library_file) - clang.cindex.Config.set_library_file(library_file) - - def main(): input() - init_libclang("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") + parser = AST_Parser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") # Input prefix = r"/Users/heck/local-default/" filenames = ["pEpEngine.h", @@ -113,7 +116,7 @@ def main(): for header in headers: header = prepare_header(header, out_dir) print("processing path: " + header.get("path") + "...") - header["content_parsed"] = clang_parse(header["path"],header["content_orig"]) + header["AST"] = parser.parse(header["path"], header["sourcecode"]) write_json(header)