Browse Source

gen is getting there (simple_ast on the way out)

master
heck 5 years ago
parent
commit
cf319a0639
  1. 72
      gen/ast_parser.py
  2. 255
      gen/extract.py
  3. 144
      gen/simple_ast.py

72
gen/ast_parser.py

@ -3,51 +3,97 @@ import clang.cindex
from clang.cindex import CursorKind
class AST_Parser:
def __init__(self,library_file=None):
class ASTParser:
def __init__(self, library_file):
if not clang.cindex.Config.loaded:
print("Using libclang from: %s", library_file)
clang.cindex.Config.set_library_file(library_file)
import sys
sys.setrecursionlimit(10000)
print("max recursion limit:", sys.getrecursionlimit())
def parse(self, filename, content):
def parse(self, filename, content=None, follow_includes=False):
index = clang.cindex.Index.create()
arguments = ["-x", "c"]
options = clang.cindex.TranslationUnit.PARSE_SKIP_FUNCTION_BODIES
if content:
content = [(filename, content)]
translation_unit = index.parse(filename, unsaved_files=content, args=arguments, options=options)
ret = self._parse(translation_unit.cursor, filename)
ret = self._parse(translation_unit.cursor, filename, follow_includes)
return ret
def _get_children_filelocal(self, cursor, path):
def _get_children(self, cursor, path, follow_includes=False):
if follow_includes:
return [c for c in cursor.get_children()]
else:
return [c for c in cursor.get_children() if c.location.file and c.location.file.name == path]
def _parse(self, cursor, path):
def _parse(self, cursor, path, follow_includes=False):
item = {}
# dont parse excluded CursorKinds
excluded_cursortypes = [CursorKind.INTEGER_LITERAL]
if not cursor.kind in excluded_cursortypes:
if not str(cursor.kind) == "":
# generic info for all CursorKinds
if str(cursor.get_tokens()):
str_tok = ""
for tok in cursor.get_tokens():
str_tok += tok.spelling + " "
item["tokens"] = str_tok.rstrip()
if str(cursor.kind):
item["kind"] = str(cursor.kind)
if not cursor.spelling == "":
if cursor.spelling:
item["name"] = cursor.spelling
if not cursor.displayname == "":
if cursor.displayname:
if cursor.displayname != item["name"]:
item["displayname"] = cursor.displayname
if not cursor.type.spelling == "":
if cursor.type.spelling:
item["type"] = cursor.type.spelling
if not cursor.result_type.spelling == "":
if cursor.result_type.spelling:
item["result_type"] = cursor.result_type.spelling
if cursor.semantic_parent:
if cursor.semantic_parent.kind.is_translation_unit():
item["semantic_parent"] = "global"
else:
item["semantic_parent"] = cursor.semantic_parent.spelling
item["is_definition"] = False
if cursor.is_definition():
item["is_definition"] = cursor.is_definition()
# TODO: Ever occrus?
if cursor.canonical:
if cursor.canonical.spelling != cursor.spelling:
item["canonical"] = cursor.canonical.spelling
# TYPE_REF specific info
# if cursor.kind == CursorKind.TYPE_REF:
# if cursor.get_definition():
# definition = cursor.get_definition()
# item["definition"] = self._parse(definition, path, follow_includes=follow_includes)
# ENUM specific info
if cursor.kind == CursorKind.ENUM_CONSTANT_DECL:
item["value"] = cursor.enum_value
child_cursors = self._get_children_filelocal(cursor, path)
# TYPEDEF specific info
if cursor.kind == CursorKind.TYPEDEF_DECL:
item["utype"] = cursor.underlying_typedef_type.spelling
# get direct children
child_cursors = self._get_children(cursor, path, follow_includes)
if len(child_cursors) > 0:
child_arr = []
for child_cursor in child_cursors:
child_result = self._parse(child_cursor, path)
child_result = self._parse(child_cursor, path, follow_includes=follow_includes)
if child_result:
child_arr.append(child_result)

255
gen/extract.py

@ -3,10 +3,11 @@
import os
import json
from ast_parser import AST_Parser
from ast_parser import ASTParser
from simple_ast import SimpleAST
def create_paths_list(dirname, filenames):
def join_dir_and_filenames(dirname, filenames):
paths = []
for basename in filenames:
path = dirname + basename
@ -14,170 +15,128 @@ def create_paths_list(dirname, filenames):
return paths
def read_files(paths):
content = []
for path in paths:
file_info = read_file(path)
content.append(file_info)
return content
def read_file(path):
with open(path) as f:
file_content = f.read()
item = {"path": path,
"sourcecode": file_content}
return item
return file_content
def write_json(header, key):
outpath = header["outpath"] + "." + key + ".json"
with open(outpath, "w+") as f:
json.dump(header[key], f, indent=4)
# out-dir is in-dir if not specified
def create_header(path, out_dir=None):
header = {"path": "",
"dir": "",
"filename": "",
"out_dir": "",
"sourcecode": ""}
header["path"] = path
header["dir"] = os.path.dirname(path)
header["filename"] = os.path.basename(path)
def prepare_header(header, out_dir):
basename = os.path.basename(header.get("path"))
outpath = out_dir + basename
header["outpath"] = outpath
header["out_dir"] = header["dir"]
if out_dir:
header["out_dir"] = out_dir
header["sourcecode"] = read_file(path)
return header
def create_simple_ast(ast):
elems = {"functions": "",
"structs": "",
"enums": ""}
elems["functions"] = extract_functions_from_ast(ast)
elems["structs"] = extract_structs_from_ast(ast)
elems["enums"] = extract_enums_from_ast(ast)
return elems
# extracts top level functions only (is there anything else in C?)
def extract_functions_from_ast(ast):
functions = []
for child in ast["children"]:
if child["kind"] == "CursorKind.FUNCTION_DECL":
functions.append(simple_ast_functions(child))
return functions
def simple_ast_functions(func_ast):
simple_func = {"name": "",
"return_type": "",
"arguments": []}
simple_func["name"] = func_ast["name"]
simple_func["return_type"] = func_ast["result_type"]
arguments = []
# check if func has args
if "children" in func_ast:
for arg_ast in func_ast["children"]:
arg_simple = None
if arg_ast["kind"] == "CursorKind.PARM_DECL":
arg_simple = {"name": "",
"type": ""}
arg_simple["name"] = arg_ast["name"]
arg_simple["type"] = arg_ast["type"]
if arg_simple:
arguments.append(arg_simple)
if arguments:
simple_func["arguments"] = arguments
return simple_func
# only typedef enums
def extract_enums_from_ast(ast):
enums = []
for typedef in ast["children"]:
if typedef["kind"] == "CursorKind.TYPEDEF_DECL":
typename = typedef["type"]
if "children" in typedef:
for enum in typedef["children"]:
if enum["kind"] == "CursorKind.ENUM_DECL":
enums.append(simple_ast_enums(enum, typename))
return enums
def simple_ast_enums(enum_ast, typename):
simple_enum = {"name": typename,
"items": []}
items = []
if "children" in enum_ast:
for item in enum_ast["children"]:
if item["kind"] == "CursorKind.ENUM_CONSTANT_DECL":
simple_item = {"name": "",
"value": ""}
simple_item["name"] = item["name"]
simple_item["value"] = item["value"]
items.append(simple_item)
if items:
simple_enum["items"] = items
return simple_enum
# only typedef structs
def extract_structs_from_ast(ast):
structs = []
for typedef in ast["children"]:
if typedef["kind"] == "CursorKind.TYPEDEF_DECL":
typename = typedef["type"]
if "children" in typedef:
for struct in typedef["children"]:
if struct["kind"] == "CursorKind.STRUCT_DECL":
structs.append(simple_ast_structs(struct, typename))
return structs
def simple_ast_structs(struct_ast, typename):
simple_struct = {"name" : typename,
"fields" : []}
fields = []
if "children" in struct_ast:
for field in struct_ast["children"]:
if field["kind"] == "CursorKind.FIELD_DECL":
simple_field = {"name": "",
"type": ""}
simple_field["name"] = field["name"]
simple_field["type"] = field["type"]
fields.append(simple_field)
if fields:
simple_struct["fields"] = fields
return simple_struct
def write_json(content, outpath):
# create path if not existing
out_dir = os.path.dirname(outpath)
if not os.path.isdir(out_dir):
os.makedirs(out_dir)
# write
with open(outpath, "w+") as f:
json.dump(content, f, indent=4)
def main():
input()
parser = AST_Parser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
# Input
prefix = r"/Users/heck/local-default/"
# generates simple-ast for each header specified in spec out dir.
def main_old():
parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
in_dir = r"/Users/heck/local-default/include/pEp/"
filenames = ["pEpEngine.h",
"keymanagement.h"]
# Output
out_dir = "data/output/"
if not os.path.isdir(out_dir):
os.makedirs(out_dir)
"keymanagement.h",
"message_api.h",
"message.h",
"sync_api.h",
"key_reset.h",
"Identity.h",
"Rating.h"]
in_dir = prefix + r"include/pEp/"
paths = create_paths_list(in_dir, filenames)
headers = read_files(paths)
out_dir = "data/output"
paths = join_dir_and_filenames(in_dir, filenames)
headers = []
for path in paths:
headers.append(create_header(path, out_dir))
for header in headers:
header = prepare_header(header, out_dir)
print("processing path: " + header.get("path") + "...")
print("processing path: " + header["path"] + "...")
header["ast"] = parser.parse(header["path"], header["sourcecode"])
write_json(header, "ast")
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json")
simpleAst = SimpleAST()
header["simple_ast"] = simpleAst.create_simple_ast(header["ast"])
write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json")
header["simple_ast"] = create_simple_ast(header["ast"])
write_json(header, "simple_ast")
def recursive_query(data, filter, transform=lambda x: x):
resultset = []
# decorator just handling exceptions
def filter_decorator(data):
try:
return filter(data)
except KeyError:
pass
# filter current data
if filter_decorator(data):
# transform result
xformed = transform(data)
if xformed:
resultset.append(xformed)
# recurse
if "children" in data:
for item in data["children"]:
childres = recursive_query(item, filter, transform)
if childres:
resultset += childres
return resultset
def main_new():
parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
# header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./")
header = create_header("data/input/test_data/main_include.h")
header["ast"] = parser.parse(header["path"], follow_includes=True)
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json")
# query
def filter_xzy(item):
if (item["is_definition"] == False
and item["kind"] == "CursorKind.STRUCT_DECL"
):
return True
def xform(item):
return item
matches = recursive_query(header["ast"], filter_xzy, xform)
# matches = list(set(matches))
write_json(matches, header["out_dir"] + "/" + header["filename"] + ".matches.json")
def main():
main_old()
# main_new()
if __name__ == "__main__":

144
gen/simple_ast.py

@ -0,0 +1,144 @@
# -*- coding: utf-8 -*-
class SimpleAST:
def __init__(self):
pass
def create_simple_ast(self,ast):
elems = {"functions": "",
"typedefs": "",
"structs": "",
"enums": ""}
elems["functions"] = self.extract_functions_from_ast(ast)
elems["typedefs"] = self.extract_typedefs_from_ast(ast)
elems["structs"] = self.extract_structs_from_ast(ast)
elems["enums"] = self.extract_enums_from_ast(ast)
return elems
# extracts top level functions only (is there anything else in C?)
def extract_functions_from_ast(self,ast):
functions = []
for child in ast["children"]:
if child["kind"] == "CursorKind.FUNCTION_DECL":
functions.append(self._simple_ast_functions(child))
return functions
def _simple_ast_functions(self, func_ast):
simple_func = {"name": "",
"return_type": "",
"arguments": []}
simple_func["name"] = func_ast["name"]
simple_func["return_type"] = func_ast["result_type"]
arguments = []
# check if func has args
if "children" in func_ast:
for arg_ast in func_ast["children"]:
arg_simple = None
if arg_ast["kind"] == "CursorKind.PARM_DECL":
arg_simple = {"name": "",
"type": ""}
arg_simple["name"] = arg_ast["name"]
arg_simple["type"] = arg_ast["type"]
if arg_simple:
arguments.append(arg_simple)
if arguments:
simple_func["arguments"] = arguments
return simple_func
# only typedef enums
def extract_enums_from_ast(self, ast):
enums = []
for typedef in ast["children"]:
if typedef["kind"] == "CursorKind.TYPEDEF_DECL":
typename = typedef["type"]
if "children" in typedef:
for enum in typedef["children"]:
if enum["kind"] == "CursorKind.ENUM_DECL":
enums.append(self._simple_ast_enums(enum, typename))
return enums
def _simple_ast_enums(self, enum_ast, typename):
simple_enum = {"name": typename,
"items": []}
items = []
if "children" in enum_ast:
for item in enum_ast["children"]:
if item["kind"] == "CursorKind.ENUM_CONSTANT_DECL":
simple_item = {"name": "",
"value": ""}
simple_item["name"] = item["name"]
simple_item["value"] = item["value"]
items.append(simple_item)
if items:
simple_enum["items"] = items
return simple_enum
# only typedef structs
def extract_structs_from_ast(self, ast):
structs = []
for typedef in ast["children"]:
if typedef["kind"] == "CursorKind.TYPEDEF_DECL":
typename = typedef["type"]
if "children" in typedef:
for struct in typedef["children"]:
if struct["kind"] == "CursorKind.STRUCT_DECL":
structs.append(self._simple_ast_structs(struct, typename))
return structs
def _simple_ast_structs(self, struct_ast, typename):
simple_struct = {"name": typename,
"fields": []}
fields = []
if "children" in struct_ast:
for field in struct_ast["children"]:
if field["kind"] == "CursorKind.FIELD_DECL":
simple_field = {"name": "",
"type": ""}
simple_field["name"] = field["name"]
simple_field["type"] = field["type"]
fields.append(simple_field)
if fields:
simple_struct["fields"] = fields
return simple_struct
def extract_typedefs_from_ast(self, ast):
typdefs = []
for typedef in ast["children"]:
if typedef["kind"] == "CursorKind.TYPEDEF_DECL":
typename = typedef["type"]
if "children" in typedef:
children = typedef["children"]
if children:
ref = children[0]
if ref["kind"] == "CursorKind.TYPE_REF":
typdefs.append(self._simple_ast_typedefs(ref, typename))
return typdefs
def _simple_ast_typedefs(self, typedef_ast, typename):
simple_typedef = {"name": typename,
"kind" : "",
"value": ""}
simple_typedef["kind"] = typedef_ast["kind"]
simple_typedef["value"] = typedef_ast["name"]
return simple_typedef
Loading…
Cancel
Save