You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
413 lines
11 KiB
413 lines
11 KiB
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
import os
|
|
import json
|
|
|
|
from ast_parser import ASTParser
|
|
from simple_ast import SimpleAST
|
|
|
|
|
|
def join_dir_and_filenames(dirname, filenames):
|
|
paths = []
|
|
for basename in filenames:
|
|
path = dirname + basename
|
|
paths.append(path)
|
|
return paths
|
|
|
|
|
|
def read_file(path):
|
|
with open(path) as f:
|
|
file_content = f.read()
|
|
return file_content
|
|
|
|
|
|
# out-dir is in-dir if not specified
|
|
def create_header(path, out_dir=None):
|
|
header = {"path": "",
|
|
"dir": "",
|
|
"filename": "",
|
|
"out_dir": "",
|
|
"sourcecode": ""}
|
|
|
|
header["path"] = path
|
|
header["dir"] = os.path.dirname(path)
|
|
header["filename"] = os.path.basename(path)
|
|
|
|
header["out_dir"] = header["dir"]
|
|
if out_dir:
|
|
header["out_dir"] = out_dir
|
|
|
|
header["sourcecode"] = read_file(path)
|
|
return header
|
|
|
|
|
|
def write_json(content, outpath):
|
|
# create path if not existing
|
|
out_dir = os.path.dirname(outpath)
|
|
if not os.path.isdir(out_dir):
|
|
os.makedirs(out_dir)
|
|
# write
|
|
with open(outpath, "w+") as f:
|
|
json.dump(content, f, indent=4)
|
|
|
|
|
|
|
|
# works on valid json like structure lists/dict
|
|
def recursive_query(data, filter, transform=lambda x: x):
|
|
# all datatypes accepted
|
|
# print(type(data))
|
|
# assert (type(data) in [list, dict, int, str, bool])
|
|
resultset = []
|
|
|
|
# decorator just handling exceptions
|
|
def filter_decorator(data):
|
|
try:
|
|
return filter(data)
|
|
except KeyError:
|
|
pass
|
|
|
|
# filter current data
|
|
# only dict types are filtered
|
|
if type(data) in [dict]:
|
|
if filter_decorator(data):
|
|
# transform result
|
|
xformed = transform(data)
|
|
if xformed:
|
|
resultset.append(xformed)
|
|
|
|
if isinstance(data, dict):
|
|
for item in data.values():
|
|
childres = recursive_query(item, filter, transform)
|
|
if childres:
|
|
resultset += childres
|
|
elif isinstance(data, list):
|
|
for item in data:
|
|
childres = recursive_query(item, filter, transform)
|
|
if childres:
|
|
resultset += childres
|
|
|
|
return resultset
|
|
|
|
|
|
def extract_forward_declarations(data):
|
|
def filter(item):
|
|
if (item["is_definition"] == False
|
|
and item["kind"] == "CursorKind.STRUCT_DECL"
|
|
):
|
|
return True
|
|
|
|
def xform(item):
|
|
return item
|
|
|
|
return recursive_query(data, filter, xform)
|
|
|
|
|
|
def extract_functions(data, function_names):
|
|
def filter(item):
|
|
if (item["kind"] == "CursorKind.FUNCTION_DECL"
|
|
and item["name"] in function_names
|
|
):
|
|
return True
|
|
|
|
def xform(item):
|
|
return item
|
|
|
|
return recursive_query(data, filter, xform)
|
|
|
|
|
|
# just the typenames of all typerefs no dups
|
|
def collect_typerefs(data):
|
|
def filter(item):
|
|
if (item["kind"] == "CursorKind.TYPE_REF"
|
|
):
|
|
return True
|
|
|
|
def xform(item):
|
|
return item["type"]
|
|
|
|
results = recursive_query(data, filter, xform)
|
|
no_dups = list(set(results))
|
|
return no_dups
|
|
|
|
|
|
def extract_vars(data, var_names):
|
|
def filter(item):
|
|
if (item["kind"] == "CursorKind.VAR_DECL"
|
|
and item["name"] in var_names
|
|
):
|
|
return True
|
|
|
|
def xform(item):
|
|
return item
|
|
|
|
return recursive_query(data, filter, xform)
|
|
|
|
|
|
def extract_types(data):
|
|
def filter(item):
|
|
if (item["kind"] == "CursorKind.TYPEDEF_DECL"
|
|
or item["kind"] == "CursorKind.ENUM_DECL"
|
|
or item["kind"] == "CursorKind.STRUCT_DECL"
|
|
):
|
|
return True
|
|
|
|
def xform(item):
|
|
ret = item["name"]
|
|
return ret
|
|
|
|
return recursive_query(data, filter, xform)
|
|
|
|
|
|
def remove_dup_dicts(arr_of_dicts):
|
|
arr_no_dups = [dict(i) for i in {tuple(d.items()) for d in arr_of_dicts}]
|
|
return arr_no_dups
|
|
|
|
|
|
def resolve_typerefs(ast, typeref_names):
|
|
types = []
|
|
typeref_resolved = []
|
|
typeref_notfound = []
|
|
|
|
for typeref_name in typeref_names:
|
|
res = resolve_typeref(ast, typeref_name)
|
|
if not res:
|
|
typeref_notfound.append(typeref_name)
|
|
else:
|
|
types.append(res)
|
|
typeref_resolved.append(typeref_name)
|
|
|
|
return (types, typeref_resolved, typeref_notfound)
|
|
|
|
|
|
def resolve_typeref(ast, typeref_name):
|
|
ret = None
|
|
|
|
ret = extract_enum_decl(ast, typeref_name)
|
|
if not ret:
|
|
ret = extract_struct_decl(ast, typeref_name)
|
|
if not ret:
|
|
ret = extract_typedef_decl(ast, typeref_name)
|
|
|
|
return ret
|
|
|
|
|
|
def extract_enum_decl(ast, name):
|
|
ret = None
|
|
|
|
def filter(data):
|
|
if (data["kind"] == "CursorKind.ENUM_DECL"
|
|
and data["type"] == name
|
|
):
|
|
return True
|
|
|
|
res = recursive_query(ast, filter)
|
|
if len(res) > 1:
|
|
assert False, "duplicate definition"
|
|
if len(res) == 1:
|
|
ret = res[0]
|
|
|
|
return ret
|
|
|
|
|
|
def extract_struct_decl(ast, name):
|
|
ret = None
|
|
|
|
def filter(data):
|
|
if (data["kind"] == "CursorKind.STRUCT_DECL"
|
|
and data["type"] == name
|
|
):
|
|
return True
|
|
|
|
res = recursive_query(ast, filter)
|
|
if len(res) > 1:
|
|
assert False, "duplicate definition"
|
|
if len(res) == 1:
|
|
ret = res[0]
|
|
|
|
return ret
|
|
|
|
|
|
def extract_typedef_decl(ast, name):
|
|
ret = None
|
|
|
|
def filter(data):
|
|
if (data["kind"] == "CursorKind.TYPEDEF_DECL"
|
|
and data["type"] == name
|
|
):
|
|
return True
|
|
|
|
res = recursive_query(ast, filter)
|
|
if res:
|
|
ret = res.pop()
|
|
|
|
if ret["utypekind"] == "Typedef":
|
|
ret = extract_typedef_decl(ast, ret["utype"])
|
|
elif ret["utypekind"] == "Elaborated":
|
|
ret = extract_enum_decl(ast, ret["utype"]) or extract_struct_decl(ast, ret["utype"])
|
|
|
|
return ret
|
|
|
|
|
|
def find_dup_types(data):
|
|
def filter(item):
|
|
if (item["kind"] == "CursorKind.STRUCT_DECL"
|
|
or item["kind"] == "CursorKind.ENUM_DECL"):
|
|
return True
|
|
|
|
def xform(item):
|
|
return item["type"]
|
|
|
|
all_types = recursive_query(data, filter, xform)
|
|
dups = set()
|
|
for type in all_types:
|
|
if all_types.count(type) > 1:
|
|
dups.add(type)
|
|
|
|
return (all_types, list(dups))
|
|
|
|
|
|
def main_new():
|
|
ast_parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
|
|
|
|
function_names = {""}
|
|
# function_names.add("func_void_void")
|
|
# function_names.add("func_void_int")
|
|
# function_names.add("func_int_void")
|
|
# function_names.add("func_int_int")
|
|
# function_names.add("func__PS_V")
|
|
# function_names.add("func__E_V")
|
|
# function_names.add("func_TP_V")
|
|
# function_names.add("func_V_TP")
|
|
# function_names.add("func_TP_TP")
|
|
# function_names.add("func_APS_V")
|
|
# function_names.add("func_V_APS")
|
|
# function_names.add("func_APS_APS")
|
|
# function_names.add("func_TPS_V")
|
|
# function_names.add("func_V_TPS")
|
|
# function_names.add("func_V_TPS")
|
|
# function_names.add("func_TPS_TPS")
|
|
# function_names.add("func_TPS_TPS")
|
|
|
|
var_names = {""}
|
|
# var_names.add("var_P")
|
|
# var_names.add("var__E")
|
|
# var_names.add("var_E")
|
|
# var_names.add("var_AE")
|
|
# var_names.add("var__PS")
|
|
# var_names.add("var_PS")
|
|
# var_names.add("var_APS")
|
|
# var_names.add("var__CS")
|
|
# var_names.add("var_CS")
|
|
# var_names.add("var_ACS")
|
|
# var_names.add("var__CCS")
|
|
# var_names.add("var_CCS")
|
|
# var_names.add("var_ACCS")
|
|
var_names.add("var__HS")
|
|
# var_names.add("var__NCS")
|
|
# var_names.add("var_HS")
|
|
# var_names.add("var__NSP")
|
|
# var_names.add("var_AHS")
|
|
# var_names.add("var__HHS")
|
|
# var_names.add("var__NHS")
|
|
# var_names.add("var__NNPS")
|
|
# var_names.add("var_HHS")
|
|
# var_names.add("var__NHS1")
|
|
# var_names.add("var__NNCS")
|
|
# var_names.add("var__NENHS")
|
|
# var_names.add("var_ASHS")
|
|
# var_names.add("var__HS1")
|
|
# var_names.add("var_NEHS1")
|
|
|
|
# header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./")
|
|
header = create_header("data/input/test_data/test_lib.h")
|
|
|
|
header["ast"] = ast_parser.parse(header["path"], follow_includes=True)
|
|
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json")
|
|
|
|
# VIEWS
|
|
views = {}
|
|
views["forward_declarations"] = extract_forward_declarations(header["ast"])
|
|
views["types"] = extract_types(header["ast"])
|
|
views["types_count"] = len(views["types"])
|
|
|
|
# CDL
|
|
# AST nonrmalizing
|
|
|
|
|
|
cdl = {"functions": "",
|
|
"vars": "",
|
|
"structs": "",
|
|
"enums": ""}
|
|
|
|
# stage 1: extract functions and vars
|
|
cdl["functions"] = extract_functions(header["ast"], function_names)
|
|
cdl["vars"] = extract_vars(header["ast"], var_names)
|
|
|
|
# stage 2: collect type refs
|
|
cdl["types_resolved"] = []
|
|
cdl["typerefs_unresolved"] = []
|
|
cdl["typerefs_resolved"] = []
|
|
cdl["typerefs_notfound"] = []
|
|
|
|
while True:
|
|
cdl["typerefs_unresolved"] = collect_typerefs(cdl) # only list of typenames
|
|
cdl["typerefs_unresolved"] = list(set(cdl["typerefs_unresolved"]) - (set(cdl["typerefs_resolved"]).union(set(cdl["typerefs_notfound"]))))
|
|
if (len(cdl["typerefs_unresolved"]) <= 0):
|
|
break
|
|
|
|
(types_resolved, typerefs_resolved, notfound) = resolve_typerefs(header["ast"], cdl["typerefs_unresolved"])
|
|
cdl["types_resolved"] += types_resolved
|
|
cdl["typerefs_resolved"] += typerefs_resolved
|
|
cdl["typerefs_notfound"] += notfound
|
|
|
|
|
|
(views["all_types"], views["dup_types"]) = find_dup_types(cdl)
|
|
(views["ast_all_types"], views["ast_dup_types"]) = find_dup_types(header["ast"])
|
|
header["cdl"] = cdl
|
|
write_json(header["cdl"], header["out_dir"] + "/" + header["filename"] + ".cdl.json")
|
|
|
|
header["views"] = views
|
|
write_json(header["views"], header["out_dir"] + "/" + header["filename"] + ".views.json")
|
|
|
|
|
|
|
|
|
|
|
|
# generates simple-ast for each header specified in spec out dir.
|
|
def main_old():
|
|
parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
|
|
|
|
in_dir = r"/Users/heck/local-default/include/pEp/"
|
|
filenames = ["pEpEngine.h",
|
|
"keymanagement.h",
|
|
"message_api.h",
|
|
"message.h",
|
|
"sync_api.h",
|
|
"key_reset.h",
|
|
"Identity.h",
|
|
"Rating.h"]
|
|
|
|
out_dir = "data/output"
|
|
|
|
paths = join_dir_and_filenames(in_dir, filenames)
|
|
|
|
headers = []
|
|
for path in paths:
|
|
headers.append(create_header(path, out_dir))
|
|
|
|
for header in headers:
|
|
print("processing path: " + header["path"] + "...")
|
|
header["ast"] = parser.parse(header["path"], header["sourcecode"])
|
|
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json")
|
|
|
|
simpleAst = SimpleAST()
|
|
header["simple_ast"] = simpleAst.create_simple_ast(header["ast"])
|
|
write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json")
|
|
|
|
def main():
|
|
# main_old()
|
|
main_new()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|