
2 changed files with 430 additions and 143 deletions
@ -1,143 +0,0 @@ |
|||||
#!/usr/bin/env python3 |
|
||||
# -*- coding: utf-8 -*- |
|
||||
import os |
|
||||
import json |
|
||||
|
|
||||
from ast_parser import ASTParser |
|
||||
from simple_ast import SimpleAST |
|
||||
|
|
||||
|
|
||||
def join_dir_and_filenames(dirname, filenames): |
|
||||
paths = [] |
|
||||
for basename in filenames: |
|
||||
path = dirname + basename |
|
||||
paths.append(path) |
|
||||
return paths |
|
||||
|
|
||||
|
|
||||
def read_file(path): |
|
||||
with open(path) as f: |
|
||||
file_content = f.read() |
|
||||
return file_content |
|
||||
|
|
||||
|
|
||||
# out-dir is in-dir if not specified |
|
||||
def create_header(path, out_dir=None): |
|
||||
header = {"path": "", |
|
||||
"dir": "", |
|
||||
"filename": "", |
|
||||
"out_dir": "", |
|
||||
"sourcecode": ""} |
|
||||
|
|
||||
header["path"] = path |
|
||||
header["dir"] = os.path.dirname(path) |
|
||||
header["filename"] = os.path.basename(path) |
|
||||
|
|
||||
header["out_dir"] = header["dir"] |
|
||||
if out_dir: |
|
||||
header["out_dir"] = out_dir |
|
||||
|
|
||||
header["sourcecode"] = read_file(path) |
|
||||
return header |
|
||||
|
|
||||
|
|
||||
def write_json(content, outpath): |
|
||||
# create path if not existing |
|
||||
out_dir = os.path.dirname(outpath) |
|
||||
if not os.path.isdir(out_dir): |
|
||||
os.makedirs(out_dir) |
|
||||
# write |
|
||||
with open(outpath, "w+") as f: |
|
||||
json.dump(content, f, indent=4) |
|
||||
|
|
||||
|
|
||||
# generates simple-ast for each header specified in spec out dir. |
|
||||
def main_old(): |
|
||||
parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") |
|
||||
|
|
||||
in_dir = r"/Users/heck/local-default/include/pEp/" |
|
||||
filenames = ["pEpEngine.h", |
|
||||
"keymanagement.h", |
|
||||
"message_api.h", |
|
||||
"message.h", |
|
||||
"sync_api.h", |
|
||||
"key_reset.h", |
|
||||
"Identity.h", |
|
||||
"Rating.h"] |
|
||||
|
|
||||
out_dir = "data/output" |
|
||||
|
|
||||
paths = join_dir_and_filenames(in_dir, filenames) |
|
||||
|
|
||||
headers = [] |
|
||||
for path in paths: |
|
||||
headers.append(create_header(path, out_dir)) |
|
||||
|
|
||||
for header in headers: |
|
||||
print("processing path: " + header["path"] + "...") |
|
||||
header["ast"] = parser.parse(header["path"], header["sourcecode"]) |
|
||||
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") |
|
||||
|
|
||||
simpleAst = SimpleAST() |
|
||||
header["simple_ast"] = simpleAst.create_simple_ast(header["ast"]) |
|
||||
write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json") |
|
||||
|
|
||||
|
|
||||
def recursive_query(data, filter, transform=lambda x: x): |
|
||||
resultset = [] |
|
||||
|
|
||||
# decorator just handling exceptions |
|
||||
def filter_decorator(data): |
|
||||
try: |
|
||||
return filter(data) |
|
||||
except KeyError: |
|
||||
pass |
|
||||
|
|
||||
# filter current data |
|
||||
if filter_decorator(data): |
|
||||
# transform result |
|
||||
xformed = transform(data) |
|
||||
if xformed: |
|
||||
resultset.append(xformed) |
|
||||
|
|
||||
# recurse |
|
||||
if "children" in data: |
|
||||
for item in data["children"]: |
|
||||
childres = recursive_query(item, filter, transform) |
|
||||
if childres: |
|
||||
resultset += childres |
|
||||
|
|
||||
return resultset |
|
||||
|
|
||||
|
|
||||
def main_new(): |
|
||||
parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") |
|
||||
|
|
||||
# header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./") |
|
||||
header = create_header("data/input/test_data/main_include.h") |
|
||||
|
|
||||
header["ast"] = parser.parse(header["path"], follow_includes=True) |
|
||||
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") |
|
||||
|
|
||||
# query |
|
||||
def filter_xzy(item): |
|
||||
if (item["is_definition"] == False |
|
||||
and item["kind"] == "CursorKind.STRUCT_DECL" |
|
||||
): |
|
||||
return True |
|
||||
|
|
||||
def xform(item): |
|
||||
return item |
|
||||
|
|
||||
matches = recursive_query(header["ast"], filter_xzy, xform) |
|
||||
# matches = list(set(matches)) |
|
||||
write_json(matches, header["out_dir"] + "/" + header["filename"] + ".matches.json") |
|
||||
|
|
||||
|
|
||||
def main(): |
|
||||
main_old() |
|
||||
# main_new() |
|
||||
|
|
||||
|
|
||||
if __name__ == "__main__": |
|
||||
main() |
|
@ -0,0 +1,430 @@ |
|||||
|
#!/usr/bin/env python3 |
||||
|
# -*- coding: utf-8 -*- |
||||
|
import os |
||||
|
import json |
||||
|
|
||||
|
from ast_parser import ASTParser |
||||
|
from simple_ast import SimpleAST |
||||
|
|
||||
|
|
||||
|
def join_dir_and_filenames(dirname, filenames): |
||||
|
paths = [] |
||||
|
for basename in filenames: |
||||
|
path = dirname + basename |
||||
|
paths.append(path) |
||||
|
return paths |
||||
|
|
||||
|
|
||||
|
def read_file(path): |
||||
|
with open(path) as f: |
||||
|
file_content = f.read() |
||||
|
return file_content |
||||
|
|
||||
|
|
||||
|
# out-dir is in-dir if not specified |
||||
|
def create_header(path, out_dir=None): |
||||
|
header = {"path": "", |
||||
|
"dir": "", |
||||
|
"filename": "", |
||||
|
"out_dir": "", |
||||
|
"sourcecode": ""} |
||||
|
|
||||
|
header["path"] = path |
||||
|
header["dir"] = os.path.dirname(path) |
||||
|
header["filename"] = os.path.basename(path) |
||||
|
|
||||
|
header["out_dir"] = header["dir"] |
||||
|
if out_dir: |
||||
|
header["out_dir"] = out_dir |
||||
|
|
||||
|
header["sourcecode"] = read_file(path) |
||||
|
return header |
||||
|
|
||||
|
|
||||
|
def write_json(content, outpath): |
||||
|
# create path if not existing |
||||
|
out_dir = os.path.dirname(outpath) |
||||
|
if not os.path.isdir(out_dir): |
||||
|
os.makedirs(out_dir) |
||||
|
# write |
||||
|
with open(outpath, "w+") as f: |
||||
|
json.dump(content, f, indent=4) |
||||
|
|
||||
|
|
||||
|
# generates simple-ast for each header specified in spec out dir. |
||||
|
def main_old(): |
||||
|
parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") |
||||
|
|
||||
|
in_dir = r"/Users/heck/local-default/include/pEp/" |
||||
|
filenames = ["pEpEngine.h", |
||||
|
"keymanagement.h", |
||||
|
"message_api.h", |
||||
|
"message.h", |
||||
|
"sync_api.h", |
||||
|
"key_reset.h", |
||||
|
"Identity.h", |
||||
|
"Rating.h"] |
||||
|
|
||||
|
out_dir = "data/output" |
||||
|
|
||||
|
paths = join_dir_and_filenames(in_dir, filenames) |
||||
|
|
||||
|
headers = [] |
||||
|
for path in paths: |
||||
|
headers.append(create_header(path, out_dir)) |
||||
|
|
||||
|
for header in headers: |
||||
|
print("processing path: " + header["path"] + "...") |
||||
|
header["ast"] = parser.parse(header["path"], header["sourcecode"]) |
||||
|
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") |
||||
|
|
||||
|
simpleAst = SimpleAST() |
||||
|
header["simple_ast"] = simpleAst.create_simple_ast(header["ast"]) |
||||
|
write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json") |
||||
|
|
||||
|
|
||||
|
# works on valid json like structure lists/dict |
||||
|
def recursive_query(data, filter, transform=lambda x: x): |
||||
|
# all datatypes accepted |
||||
|
# print(type(data)) |
||||
|
# assert (type(data) in [list, dict, int, str, bool]) |
||||
|
resultset = [] |
||||
|
|
||||
|
# decorator just handling exceptions |
||||
|
def filter_decorator(data): |
||||
|
try: |
||||
|
return filter(data) |
||||
|
except KeyError: |
||||
|
pass |
||||
|
|
||||
|
# filter current data |
||||
|
# only dict types are filtered |
||||
|
if type(data) in [dict]: |
||||
|
if filter_decorator(data): |
||||
|
# transform result |
||||
|
xformed = transform(data) |
||||
|
if xformed: |
||||
|
resultset.append(xformed) |
||||
|
|
||||
|
if isinstance(data, dict): |
||||
|
for item in data.values(): |
||||
|
childres = recursive_query(item, filter, transform) |
||||
|
if childres: |
||||
|
resultset += childres |
||||
|
elif isinstance(data, list): |
||||
|
for item in data: |
||||
|
childres = recursive_query(item, filter, transform) |
||||
|
if childres: |
||||
|
resultset += childres |
||||
|
|
||||
|
return resultset |
||||
|
|
||||
|
|
||||
|
def extract_forward_declarations(data): |
||||
|
def filter(item): |
||||
|
if (item["is_definition"] == False |
||||
|
and item["kind"] == "CursorKind.STRUCT_DECL" |
||||
|
): |
||||
|
return True |
||||
|
|
||||
|
def xform(item): |
||||
|
return item |
||||
|
|
||||
|
return recursive_query(data, filter, xform) |
||||
|
|
||||
|
|
||||
|
def extract_functions(data, function_names): |
||||
|
def filter(item): |
||||
|
if (item["kind"] == "CursorKind.FUNCTION_DECL" |
||||
|
and item["name"] in function_names |
||||
|
): |
||||
|
return True |
||||
|
|
||||
|
def xform(item): |
||||
|
return item |
||||
|
|
||||
|
return recursive_query(data, filter, xform) |
||||
|
|
||||
|
|
||||
|
# just the typenames of all typerefs no dups |
||||
|
def collect_typerefs(data): |
||||
|
def filter(item): |
||||
|
if (item["kind"] == "CursorKind.TYPE_REF" |
||||
|
): |
||||
|
return True |
||||
|
|
||||
|
def xform(item): |
||||
|
return item["type"] |
||||
|
|
||||
|
results = recursive_query(data, filter, xform) |
||||
|
no_dups = list(set(results)) |
||||
|
return no_dups |
||||
|
|
||||
|
|
||||
|
def extract_vars(data, var_names): |
||||
|
def filter(item): |
||||
|
if (item["kind"] == "CursorKind.VAR_DECL" |
||||
|
and item["name"] in var_names |
||||
|
): |
||||
|
return True |
||||
|
|
||||
|
def xform(item): |
||||
|
return item |
||||
|
|
||||
|
return recursive_query(data, filter, xform) |
||||
|
|
||||
|
|
||||
|
def extract_types(data): |
||||
|
def filter(item): |
||||
|
if (item["kind"] == "CursorKind.TYPEDEF_DECL" |
||||
|
or item["kind"] == "CursorKind.ENUM_DECL" |
||||
|
or item["kind"] == "CursorKind.STRUCT_DECL" |
||||
|
): |
||||
|
return True |
||||
|
|
||||
|
def xform(item): |
||||
|
# ret = {} |
||||
|
# ret["name"] = item["name"] |
||||
|
# if "utypekind" in item: |
||||
|
# ret["utypekind"] = item["utypekind"] |
||||
|
# else: |
||||
|
# ret["utypekind"] = item["type"] |
||||
|
ret = item["name"] |
||||
|
return ret |
||||
|
|
||||
|
return recursive_query(data, filter, xform) |
||||
|
|
||||
|
|
||||
|
def remove_dup_dicts(arr_of_dicts): |
||||
|
arr_no_dups = [dict(i) for i in {tuple(d.items()) for d in arr_of_dicts}] |
||||
|
return arr_no_dups |
||||
|
|
||||
|
|
||||
|
def resolve_typerefs(ast, typeref_names): |
||||
|
types = [] |
||||
|
typeref_resolved = [] |
||||
|
typeref_notfound = [] |
||||
|
|
||||
|
for typeref_name in typeref_names: |
||||
|
res = resolve_typeref(ast, typeref_name) |
||||
|
if not res: |
||||
|
typeref_notfound.append(typeref_name) |
||||
|
else: |
||||
|
types.append(res) |
||||
|
typeref_resolved.append(typeref_name) |
||||
|
|
||||
|
return (types, typeref_resolved, typeref_notfound) |
||||
|
|
||||
|
|
||||
|
def resolve_typeref(ast, typeref_name): |
||||
|
ret = None |
||||
|
|
||||
|
ret = extract_enum_decl(ast, typeref_name) |
||||
|
if not ret: |
||||
|
ret = extract_struct_decl(ast, typeref_name) |
||||
|
if not ret: |
||||
|
ret = extract_typedef_decl(ast, typeref_name) |
||||
|
|
||||
|
return ret |
||||
|
|
||||
|
|
||||
|
def extract_enum_decl(ast, name): |
||||
|
ret = None |
||||
|
|
||||
|
def filter(data): |
||||
|
if (data["kind"] == "CursorKind.ENUM_DECL" |
||||
|
and data["type"] == name |
||||
|
): |
||||
|
return True |
||||
|
|
||||
|
res = recursive_query(ast, filter) |
||||
|
if len(res) > 1: |
||||
|
assert (False, "duplicate definition") |
||||
|
if len(res) == 1: |
||||
|
ret = res[0] |
||||
|
|
||||
|
return ret |
||||
|
|
||||
|
|
||||
|
def extract_struct_decl(ast, name): |
||||
|
ret = None |
||||
|
|
||||
|
def filter(data): |
||||
|
if (data["kind"] == "CursorKind.STRUCT_DECL" |
||||
|
and data["type"] == name |
||||
|
): |
||||
|
return True |
||||
|
|
||||
|
res = recursive_query(ast, filter) |
||||
|
if len(res) > 1: |
||||
|
assert (False, "duplicate definition") |
||||
|
if len(res) == 1: |
||||
|
ret = res[0] |
||||
|
|
||||
|
return ret |
||||
|
|
||||
|
|
||||
|
def extract_typedef_decl(ast, name): |
||||
|
ret = None |
||||
|
|
||||
|
def filter(data): |
||||
|
if (data["kind"] == "CursorKind.TYPEDEF_DECL" |
||||
|
and data["type"] == name |
||||
|
): |
||||
|
return True |
||||
|
|
||||
|
res = recursive_query(ast, filter) |
||||
|
if res: |
||||
|
ret = res.pop() |
||||
|
|
||||
|
if ret["utypekind"] == "Typedef": |
||||
|
ret = extract_typedef_decl(ast, ret["utype"]) |
||||
|
elif ret["utypekind"] == "Elaborated": |
||||
|
ret = extract_enum_decl(ast, ret["utype"]) or extract_struct_decl(ast, ret["utype"]) |
||||
|
|
||||
|
return ret |
||||
|
|
||||
|
|
||||
|
# def remove_already_resolved(resolved, typerefs): |
||||
|
# unresolved = [] |
||||
|
# for typeref in typerefs: |
||||
|
# def filter(data): |
||||
|
# if (data["kind"] == "CursorKind.STRUCT_DECL" |
||||
|
# or data["kind"] == "CursorKind.ENUM_DECL" #maybe typedef |
||||
|
# and data["type"] == typeref["type"]): |
||||
|
# return True |
||||
|
# |
||||
|
# contained = recursive_query(resolved, filter) |
||||
|
# if not contained: |
||||
|
# unresolved.append(typeref) |
||||
|
# return unresolved |
||||
|
|
||||
|
def find_dup_types(data): |
||||
|
def filter(item): |
||||
|
if (item["kind"] == "CursorKind.STRUCT_DECL" |
||||
|
or item["kind"] == "CursorKind.ENUM_DECL"): |
||||
|
return True |
||||
|
|
||||
|
def xform(item): |
||||
|
return item["type"] |
||||
|
|
||||
|
all_types = recursive_query(data, filter, xform) |
||||
|
dups = set() |
||||
|
for type in all_types: |
||||
|
if all_types.count(type) > 1: |
||||
|
dups.add(type) |
||||
|
|
||||
|
return (all_types, list(dups)) |
||||
|
|
||||
|
|
||||
|
def main_new(): |
||||
|
ast_parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") |
||||
|
|
||||
|
function_names = {""} |
||||
|
# function_names.add("func_void_void") |
||||
|
# function_names.add("func_void_int") |
||||
|
# function_names.add("func_int_void") |
||||
|
# function_names.add("func_int_int") |
||||
|
# function_names.add("func__PS_V") |
||||
|
# function_names.add("func__E_V") |
||||
|
# function_names.add("func_TP_V") |
||||
|
# function_names.add("func_V_TP") |
||||
|
# function_names.add("func_TP_TP") |
||||
|
# function_names.add("func_APS_V") |
||||
|
# function_names.add("func_V_APS") |
||||
|
# function_names.add("func_APS_APS") |
||||
|
# function_names.add("func_TPS_V") |
||||
|
# function_names.add("func_V_TPS") |
||||
|
# function_names.add("func_V_TPS") |
||||
|
# function_names.add("func_TPS_TPS") |
||||
|
# function_names.add("func_TPS_TPS") |
||||
|
|
||||
|
var_names = {""} |
||||
|
# var_names.add("var_P") |
||||
|
# var_names.add("var__E") |
||||
|
# var_names.add("var_E") |
||||
|
# var_names.add("var_AE") |
||||
|
# var_names.add("var__PS") |
||||
|
# var_names.add("var_PS") |
||||
|
# var_names.add("var_APS") |
||||
|
# var_names.add("var__CS") |
||||
|
# var_names.add("var_CS") |
||||
|
# var_names.add("var_ACS") |
||||
|
# var_names.add("var__CCS") |
||||
|
# var_names.add("var_CCS") |
||||
|
# var_names.add("var_ACCS") |
||||
|
var_names.add("var__HS") |
||||
|
# var_names.add("var__NCS") |
||||
|
# var_names.add("var_HS") |
||||
|
# var_names.add("var__NSP") |
||||
|
# var_names.add("var_AHS") |
||||
|
# var_names.add("var__HHS") |
||||
|
# var_names.add("var__NHS") |
||||
|
# var_names.add("var__NNPS") |
||||
|
# var_names.add("var_HHS") |
||||
|
# var_names.add("var__NHS1") |
||||
|
# var_names.add("var__NNCS") |
||||
|
# var_names.add("var__NENHS") |
||||
|
# var_names.add("var_ASHS") |
||||
|
# var_names.add("var__HS1") |
||||
|
# var_names.add("var_NEHS1") |
||||
|
|
||||
|
# header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./") |
||||
|
header = create_header("data/input/test_data/test_lib.h") |
||||
|
|
||||
|
header["ast"] = ast_parser.parse(header["path"], follow_includes=True) |
||||
|
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") |
||||
|
|
||||
|
# VIEWS |
||||
|
views = {} |
||||
|
views["forward_declarations"] = extract_forward_declarations(header["ast"]) |
||||
|
views["types"] = extract_types(header["ast"]) |
||||
|
views["types_count"] = len(views["types"]) |
||||
|
|
||||
|
# CDL |
||||
|
# AST nonrmalizing |
||||
|
|
||||
|
|
||||
|
cdl = {"functions": "", |
||||
|
"vars": "", |
||||
|
"structs": "", |
||||
|
"enums": ""} |
||||
|
|
||||
|
# stage 1: extract functions and vars |
||||
|
cdl["functions"] = extract_functions(header["ast"], function_names) |
||||
|
cdl["vars"] = extract_vars(header["ast"], var_names) |
||||
|
|
||||
|
# stage 2: collect type refs |
||||
|
cdl["types_resolved"] = [] |
||||
|
cdl["typerefs_unresolved"] = [] |
||||
|
cdl["typerefs_resolved"] = [] |
||||
|
cdl["typerefs_notfound"] = [] |
||||
|
|
||||
|
while True: |
||||
|
cdl["typerefs_unresolved"] = collect_typerefs(cdl) # only list of typenames |
||||
|
cdl["typerefs_unresolved"] = list(set(cdl["typerefs_unresolved"]) - (set(cdl["typerefs_resolved"]).union(set(cdl["typerefs_notfound"])))) |
||||
|
if (len(cdl["typerefs_unresolved"]) <= 0): |
||||
|
break |
||||
|
|
||||
|
(types_resolved, typerefs_resolved, notfound) = resolve_typerefs(header["ast"], cdl["typerefs_unresolved"]) |
||||
|
cdl["types_resolved"] += types_resolved |
||||
|
cdl["typerefs_resolved"] += typerefs_resolved |
||||
|
cdl["typerefs_notfound"] += notfound |
||||
|
|
||||
|
|
||||
|
(views["all_types"], views["dup_types"]) = find_dup_types(cdl) |
||||
|
|
||||
|
header["cdl"] = cdl |
||||
|
write_json(header["cdl"], header["out_dir"] + "/" + header["filename"] + ".cdl.json") |
||||
|
|
||||
|
header["views"] = views |
||||
|
write_json(header["views"], header["out_dir"] + "/" + header["filename"] + ".views.json") |
||||
|
|
||||
|
|
||||
|
def main(): |
||||
|
# main_old() |
||||
|
main_new() |
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
main() |
Loading…
Reference in new issue