Browse Source

class AST_Parser (returns a python data structure representing the AST of C source code)

master
heck 5 years ago
parent
commit
5d61a317b6
  1. 111
      gen/extract.py

111
gen/extract.py

@ -1,13 +1,65 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import glob
import re
import os
import json
import clang.cindex
from clang.cindex import CursorKind
class AST_Parser:
def __init__(self,library_file=None):
if not clang.cindex.Config.loaded:
print("Using libclang from: %s", library_file)
clang.cindex.Config.set_library_file(library_file)
def parse(self, filename, content):
index = clang.cindex.Index.create()
arguments = ["-x", "c"]
options = clang.cindex.TranslationUnit.PARSE_SKIP_FUNCTION_BODIES
content = [(filename, content)]
translation_unit = index.parse(filename, unsaved_files=content, args=arguments, options=options)
ret = self._parse(translation_unit.cursor, filename)
return ret
def _get_children_filelocal(self, cursor, path):
return [c for c in cursor.get_children() if c.location.file and c.location.file.name == path]
def _parse(self, cursor, path):
item = {}
excluded_cursortypes = [CursorKind.INTEGER_LITERAL]
if not cursor.kind in excluded_cursortypes:
if not str(cursor.kind) == "":
item["kind"] = str(cursor.kind)
if not cursor.spelling == "":
item["name"] = cursor.spelling
if not cursor.displayname == "":
item["displayname"] = cursor.displayname
if not cursor.type.spelling == "":
item["type"] = cursor.type.spelling
if not cursor.result_type.spelling == "":
item["result_type"] = cursor.result_type.spelling
if cursor.kind == CursorKind.ENUM_CONSTANT_DECL:
item["value"] = cursor.enum_value
child_cursors = self._get_children_filelocal(cursor, path)
if len(child_cursors) > 0:
child_arr = []
for child_cursor in child_cursors:
child_result = self._parse(child_cursor, path)
if child_result:
child_arr.append(child_result)
if child_arr:
item["children"] = child_arr
return item
def create_paths_list(dirname, filenames):
paths = []
for basename in filenames:
@ -28,7 +80,7 @@ def read_file(path):
with open(path) as f:
file_content = f.read()
item = {"path": path,
"content_orig": file_content}
"sourcecode": file_content}
return item
@ -45,58 +97,9 @@ def prepare_header(header, out_dir):
return header
## PARSE
def clang_parse(filename, content):
index = clang.cindex.Index.create()
arguments = ["-x", "c"]
# arguments = ["-x", "c++", "-D__CODEGEN__"]
options = clang.cindex.TranslationUnit.PARSE_SKIP_FUNCTION_BODIES
content = [(filename, content)]
translation_unit = index.parse(filename, unsaved_files=content, args=arguments, options=options)
ret = heckparse(translation_unit.cursor, filename)
return ret
def get_children_filelocal(cursor, path):
return [c for c in cursor.get_children() if c.location.file and c.location.file.name == path]
def heckparse(cursor, path):
item = {}
item["type"] = str(cursor.kind)
item["name"] = cursor.spelling
if cursor.kind == CursorKind.FUNCTION_DECL:
item["proto"] = cursor.displayname
if cursor.kind == CursorKind.PARM_DECL:
item["c_type"] = str(cursor.type.spelling)
if cursor.kind == CursorKind.ENUM_CONSTANT_DECL:
item["value"] = cursor.enum_value
children = get_children_filelocal(cursor, path)
if len(children) > 0:
childy = []
for child in children:
childy.append(heckparse(child, path))
item["children"] = childy
return item
## INIT
def init_libclang(library_file=None):
if not clang.cindex.Config.loaded:
print("Using libclang from: %s", library_file)
clang.cindex.Config.set_library_file(library_file)
def main():
input()
init_libclang("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
parser = AST_Parser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
# Input
prefix = r"/Users/heck/local-default/"
filenames = ["pEpEngine.h",
@ -113,7 +116,7 @@ def main():
for header in headers:
header = prepare_header(header, out_dir)
print("processing path: " + header.get("path") + "...")
header["content_parsed"] = clang_parse(header["path"],header["content_orig"])
header["AST"] = parser.parse(header["path"], header["sourcecode"])
write_json(header)

Loading…
Cancel
Save