
2 changed files with 430 additions and 143 deletions
@ -1,143 +0,0 @@ |
|||
#!/usr/bin/env python3 |
|||
# -*- coding: utf-8 -*- |
|||
import os |
|||
import json |
|||
|
|||
from ast_parser import ASTParser |
|||
from simple_ast import SimpleAST |
|||
|
|||
|
|||
def join_dir_and_filenames(dirname, filenames): |
|||
paths = [] |
|||
for basename in filenames: |
|||
path = dirname + basename |
|||
paths.append(path) |
|||
return paths |
|||
|
|||
|
|||
def read_file(path): |
|||
with open(path) as f: |
|||
file_content = f.read() |
|||
return file_content |
|||
|
|||
|
|||
# out-dir is in-dir if not specified |
|||
def create_header(path, out_dir=None): |
|||
header = {"path": "", |
|||
"dir": "", |
|||
"filename": "", |
|||
"out_dir": "", |
|||
"sourcecode": ""} |
|||
|
|||
header["path"] = path |
|||
header["dir"] = os.path.dirname(path) |
|||
header["filename"] = os.path.basename(path) |
|||
|
|||
header["out_dir"] = header["dir"] |
|||
if out_dir: |
|||
header["out_dir"] = out_dir |
|||
|
|||
header["sourcecode"] = read_file(path) |
|||
return header |
|||
|
|||
|
|||
def write_json(content, outpath): |
|||
# create path if not existing |
|||
out_dir = os.path.dirname(outpath) |
|||
if not os.path.isdir(out_dir): |
|||
os.makedirs(out_dir) |
|||
# write |
|||
with open(outpath, "w+") as f: |
|||
json.dump(content, f, indent=4) |
|||
|
|||
|
|||
# generates simple-ast for each header specified in spec out dir. |
|||
def main_old(): |
|||
parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") |
|||
|
|||
in_dir = r"/Users/heck/local-default/include/pEp/" |
|||
filenames = ["pEpEngine.h", |
|||
"keymanagement.h", |
|||
"message_api.h", |
|||
"message.h", |
|||
"sync_api.h", |
|||
"key_reset.h", |
|||
"Identity.h", |
|||
"Rating.h"] |
|||
|
|||
out_dir = "data/output" |
|||
|
|||
paths = join_dir_and_filenames(in_dir, filenames) |
|||
|
|||
headers = [] |
|||
for path in paths: |
|||
headers.append(create_header(path, out_dir)) |
|||
|
|||
for header in headers: |
|||
print("processing path: " + header["path"] + "...") |
|||
header["ast"] = parser.parse(header["path"], header["sourcecode"]) |
|||
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") |
|||
|
|||
simpleAst = SimpleAST() |
|||
header["simple_ast"] = simpleAst.create_simple_ast(header["ast"]) |
|||
write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json") |
|||
|
|||
|
|||
def recursive_query(data, filter, transform=lambda x: x): |
|||
resultset = [] |
|||
|
|||
# decorator just handling exceptions |
|||
def filter_decorator(data): |
|||
try: |
|||
return filter(data) |
|||
except KeyError: |
|||
pass |
|||
|
|||
# filter current data |
|||
if filter_decorator(data): |
|||
# transform result |
|||
xformed = transform(data) |
|||
if xformed: |
|||
resultset.append(xformed) |
|||
|
|||
# recurse |
|||
if "children" in data: |
|||
for item in data["children"]: |
|||
childres = recursive_query(item, filter, transform) |
|||
if childres: |
|||
resultset += childres |
|||
|
|||
return resultset |
|||
|
|||
|
|||
def main_new(): |
|||
parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") |
|||
|
|||
# header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./") |
|||
header = create_header("data/input/test_data/main_include.h") |
|||
|
|||
header["ast"] = parser.parse(header["path"], follow_includes=True) |
|||
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") |
|||
|
|||
# query |
|||
def filter_xzy(item): |
|||
if (item["is_definition"] == False |
|||
and item["kind"] == "CursorKind.STRUCT_DECL" |
|||
): |
|||
return True |
|||
|
|||
def xform(item): |
|||
return item |
|||
|
|||
matches = recursive_query(header["ast"], filter_xzy, xform) |
|||
# matches = list(set(matches)) |
|||
write_json(matches, header["out_dir"] + "/" + header["filename"] + ".matches.json") |
|||
|
|||
|
|||
def main(): |
|||
main_old() |
|||
# main_new() |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
main() |
@ -0,0 +1,430 @@ |
|||
#!/usr/bin/env python3 |
|||
# -*- coding: utf-8 -*- |
|||
import os |
|||
import json |
|||
|
|||
from ast_parser import ASTParser |
|||
from simple_ast import SimpleAST |
|||
|
|||
|
|||
def join_dir_and_filenames(dirname, filenames): |
|||
paths = [] |
|||
for basename in filenames: |
|||
path = dirname + basename |
|||
paths.append(path) |
|||
return paths |
|||
|
|||
|
|||
def read_file(path): |
|||
with open(path) as f: |
|||
file_content = f.read() |
|||
return file_content |
|||
|
|||
|
|||
# out-dir is in-dir if not specified |
|||
def create_header(path, out_dir=None): |
|||
header = {"path": "", |
|||
"dir": "", |
|||
"filename": "", |
|||
"out_dir": "", |
|||
"sourcecode": ""} |
|||
|
|||
header["path"] = path |
|||
header["dir"] = os.path.dirname(path) |
|||
header["filename"] = os.path.basename(path) |
|||
|
|||
header["out_dir"] = header["dir"] |
|||
if out_dir: |
|||
header["out_dir"] = out_dir |
|||
|
|||
header["sourcecode"] = read_file(path) |
|||
return header |
|||
|
|||
|
|||
def write_json(content, outpath): |
|||
# create path if not existing |
|||
out_dir = os.path.dirname(outpath) |
|||
if not os.path.isdir(out_dir): |
|||
os.makedirs(out_dir) |
|||
# write |
|||
with open(outpath, "w+") as f: |
|||
json.dump(content, f, indent=4) |
|||
|
|||
|
|||
# generates simple-ast for each header specified in spec out dir. |
|||
def main_old(): |
|||
parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") |
|||
|
|||
in_dir = r"/Users/heck/local-default/include/pEp/" |
|||
filenames = ["pEpEngine.h", |
|||
"keymanagement.h", |
|||
"message_api.h", |
|||
"message.h", |
|||
"sync_api.h", |
|||
"key_reset.h", |
|||
"Identity.h", |
|||
"Rating.h"] |
|||
|
|||
out_dir = "data/output" |
|||
|
|||
paths = join_dir_and_filenames(in_dir, filenames) |
|||
|
|||
headers = [] |
|||
for path in paths: |
|||
headers.append(create_header(path, out_dir)) |
|||
|
|||
for header in headers: |
|||
print("processing path: " + header["path"] + "...") |
|||
header["ast"] = parser.parse(header["path"], header["sourcecode"]) |
|||
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") |
|||
|
|||
simpleAst = SimpleAST() |
|||
header["simple_ast"] = simpleAst.create_simple_ast(header["ast"]) |
|||
write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json") |
|||
|
|||
|
|||
# works on valid json like structure lists/dict |
|||
def recursive_query(data, filter, transform=lambda x: x): |
|||
# all datatypes accepted |
|||
# print(type(data)) |
|||
# assert (type(data) in [list, dict, int, str, bool]) |
|||
resultset = [] |
|||
|
|||
# decorator just handling exceptions |
|||
def filter_decorator(data): |
|||
try: |
|||
return filter(data) |
|||
except KeyError: |
|||
pass |
|||
|
|||
# filter current data |
|||
# only dict types are filtered |
|||
if type(data) in [dict]: |
|||
if filter_decorator(data): |
|||
# transform result |
|||
xformed = transform(data) |
|||
if xformed: |
|||
resultset.append(xformed) |
|||
|
|||
if isinstance(data, dict): |
|||
for item in data.values(): |
|||
childres = recursive_query(item, filter, transform) |
|||
if childres: |
|||
resultset += childres |
|||
elif isinstance(data, list): |
|||
for item in data: |
|||
childres = recursive_query(item, filter, transform) |
|||
if childres: |
|||
resultset += childres |
|||
|
|||
return resultset |
|||
|
|||
|
|||
def extract_forward_declarations(data): |
|||
def filter(item): |
|||
if (item["is_definition"] == False |
|||
and item["kind"] == "CursorKind.STRUCT_DECL" |
|||
): |
|||
return True |
|||
|
|||
def xform(item): |
|||
return item |
|||
|
|||
return recursive_query(data, filter, xform) |
|||
|
|||
|
|||
def extract_functions(data, function_names): |
|||
def filter(item): |
|||
if (item["kind"] == "CursorKind.FUNCTION_DECL" |
|||
and item["name"] in function_names |
|||
): |
|||
return True |
|||
|
|||
def xform(item): |
|||
return item |
|||
|
|||
return recursive_query(data, filter, xform) |
|||
|
|||
|
|||
# just the typenames of all typerefs no dups |
|||
def collect_typerefs(data): |
|||
def filter(item): |
|||
if (item["kind"] == "CursorKind.TYPE_REF" |
|||
): |
|||
return True |
|||
|
|||
def xform(item): |
|||
return item["type"] |
|||
|
|||
results = recursive_query(data, filter, xform) |
|||
no_dups = list(set(results)) |
|||
return no_dups |
|||
|
|||
|
|||
def extract_vars(data, var_names): |
|||
def filter(item): |
|||
if (item["kind"] == "CursorKind.VAR_DECL" |
|||
and item["name"] in var_names |
|||
): |
|||
return True |
|||
|
|||
def xform(item): |
|||
return item |
|||
|
|||
return recursive_query(data, filter, xform) |
|||
|
|||
|
|||
def extract_types(data): |
|||
def filter(item): |
|||
if (item["kind"] == "CursorKind.TYPEDEF_DECL" |
|||
or item["kind"] == "CursorKind.ENUM_DECL" |
|||
or item["kind"] == "CursorKind.STRUCT_DECL" |
|||
): |
|||
return True |
|||
|
|||
def xform(item): |
|||
# ret = {} |
|||
# ret["name"] = item["name"] |
|||
# if "utypekind" in item: |
|||
# ret["utypekind"] = item["utypekind"] |
|||
# else: |
|||
# ret["utypekind"] = item["type"] |
|||
ret = item["name"] |
|||
return ret |
|||
|
|||
return recursive_query(data, filter, xform) |
|||
|
|||
|
|||
def remove_dup_dicts(arr_of_dicts): |
|||
arr_no_dups = [dict(i) for i in {tuple(d.items()) for d in arr_of_dicts}] |
|||
return arr_no_dups |
|||
|
|||
|
|||
def resolve_typerefs(ast, typeref_names): |
|||
types = [] |
|||
typeref_resolved = [] |
|||
typeref_notfound = [] |
|||
|
|||
for typeref_name in typeref_names: |
|||
res = resolve_typeref(ast, typeref_name) |
|||
if not res: |
|||
typeref_notfound.append(typeref_name) |
|||
else: |
|||
types.append(res) |
|||
typeref_resolved.append(typeref_name) |
|||
|
|||
return (types, typeref_resolved, typeref_notfound) |
|||
|
|||
|
|||
def resolve_typeref(ast, typeref_name): |
|||
ret = None |
|||
|
|||
ret = extract_enum_decl(ast, typeref_name) |
|||
if not ret: |
|||
ret = extract_struct_decl(ast, typeref_name) |
|||
if not ret: |
|||
ret = extract_typedef_decl(ast, typeref_name) |
|||
|
|||
return ret |
|||
|
|||
|
|||
def extract_enum_decl(ast, name): |
|||
ret = None |
|||
|
|||
def filter(data): |
|||
if (data["kind"] == "CursorKind.ENUM_DECL" |
|||
and data["type"] == name |
|||
): |
|||
return True |
|||
|
|||
res = recursive_query(ast, filter) |
|||
if len(res) > 1: |
|||
assert (False, "duplicate definition") |
|||
if len(res) == 1: |
|||
ret = res[0] |
|||
|
|||
return ret |
|||
|
|||
|
|||
def extract_struct_decl(ast, name): |
|||
ret = None |
|||
|
|||
def filter(data): |
|||
if (data["kind"] == "CursorKind.STRUCT_DECL" |
|||
and data["type"] == name |
|||
): |
|||
return True |
|||
|
|||
res = recursive_query(ast, filter) |
|||
if len(res) > 1: |
|||
assert (False, "duplicate definition") |
|||
if len(res) == 1: |
|||
ret = res[0] |
|||
|
|||
return ret |
|||
|
|||
|
|||
def extract_typedef_decl(ast, name): |
|||
ret = None |
|||
|
|||
def filter(data): |
|||
if (data["kind"] == "CursorKind.TYPEDEF_DECL" |
|||
and data["type"] == name |
|||
): |
|||
return True |
|||
|
|||
res = recursive_query(ast, filter) |
|||
if res: |
|||
ret = res.pop() |
|||
|
|||
if ret["utypekind"] == "Typedef": |
|||
ret = extract_typedef_decl(ast, ret["utype"]) |
|||
elif ret["utypekind"] == "Elaborated": |
|||
ret = extract_enum_decl(ast, ret["utype"]) or extract_struct_decl(ast, ret["utype"]) |
|||
|
|||
return ret |
|||
|
|||
|
|||
# def remove_already_resolved(resolved, typerefs): |
|||
# unresolved = [] |
|||
# for typeref in typerefs: |
|||
# def filter(data): |
|||
# if (data["kind"] == "CursorKind.STRUCT_DECL" |
|||
# or data["kind"] == "CursorKind.ENUM_DECL" #maybe typedef |
|||
# and data["type"] == typeref["type"]): |
|||
# return True |
|||
# |
|||
# contained = recursive_query(resolved, filter) |
|||
# if not contained: |
|||
# unresolved.append(typeref) |
|||
# return unresolved |
|||
|
|||
def find_dup_types(data): |
|||
def filter(item): |
|||
if (item["kind"] == "CursorKind.STRUCT_DECL" |
|||
or item["kind"] == "CursorKind.ENUM_DECL"): |
|||
return True |
|||
|
|||
def xform(item): |
|||
return item["type"] |
|||
|
|||
all_types = recursive_query(data, filter, xform) |
|||
dups = set() |
|||
for type in all_types: |
|||
if all_types.count(type) > 1: |
|||
dups.add(type) |
|||
|
|||
return (all_types, list(dups)) |
|||
|
|||
|
|||
def main_new(): |
|||
ast_parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib") |
|||
|
|||
function_names = {""} |
|||
# function_names.add("func_void_void") |
|||
# function_names.add("func_void_int") |
|||
# function_names.add("func_int_void") |
|||
# function_names.add("func_int_int") |
|||
# function_names.add("func__PS_V") |
|||
# function_names.add("func__E_V") |
|||
# function_names.add("func_TP_V") |
|||
# function_names.add("func_V_TP") |
|||
# function_names.add("func_TP_TP") |
|||
# function_names.add("func_APS_V") |
|||
# function_names.add("func_V_APS") |
|||
# function_names.add("func_APS_APS") |
|||
# function_names.add("func_TPS_V") |
|||
# function_names.add("func_V_TPS") |
|||
# function_names.add("func_V_TPS") |
|||
# function_names.add("func_TPS_TPS") |
|||
# function_names.add("func_TPS_TPS") |
|||
|
|||
var_names = {""} |
|||
# var_names.add("var_P") |
|||
# var_names.add("var__E") |
|||
# var_names.add("var_E") |
|||
# var_names.add("var_AE") |
|||
# var_names.add("var__PS") |
|||
# var_names.add("var_PS") |
|||
# var_names.add("var_APS") |
|||
# var_names.add("var__CS") |
|||
# var_names.add("var_CS") |
|||
# var_names.add("var_ACS") |
|||
# var_names.add("var__CCS") |
|||
# var_names.add("var_CCS") |
|||
# var_names.add("var_ACCS") |
|||
var_names.add("var__HS") |
|||
# var_names.add("var__NCS") |
|||
# var_names.add("var_HS") |
|||
# var_names.add("var__NSP") |
|||
# var_names.add("var_AHS") |
|||
# var_names.add("var__HHS") |
|||
# var_names.add("var__NHS") |
|||
# var_names.add("var__NNPS") |
|||
# var_names.add("var_HHS") |
|||
# var_names.add("var__NHS1") |
|||
# var_names.add("var__NNCS") |
|||
# var_names.add("var__NENHS") |
|||
# var_names.add("var_ASHS") |
|||
# var_names.add("var__HS1") |
|||
# var_names.add("var_NEHS1") |
|||
|
|||
# header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./") |
|||
header = create_header("data/input/test_data/test_lib.h") |
|||
|
|||
header["ast"] = ast_parser.parse(header["path"], follow_includes=True) |
|||
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json") |
|||
|
|||
# VIEWS |
|||
views = {} |
|||
views["forward_declarations"] = extract_forward_declarations(header["ast"]) |
|||
views["types"] = extract_types(header["ast"]) |
|||
views["types_count"] = len(views["types"]) |
|||
|
|||
# CDL |
|||
# AST nonrmalizing |
|||
|
|||
|
|||
cdl = {"functions": "", |
|||
"vars": "", |
|||
"structs": "", |
|||
"enums": ""} |
|||
|
|||
# stage 1: extract functions and vars |
|||
cdl["functions"] = extract_functions(header["ast"], function_names) |
|||
cdl["vars"] = extract_vars(header["ast"], var_names) |
|||
|
|||
# stage 2: collect type refs |
|||
cdl["types_resolved"] = [] |
|||
cdl["typerefs_unresolved"] = [] |
|||
cdl["typerefs_resolved"] = [] |
|||
cdl["typerefs_notfound"] = [] |
|||
|
|||
while True: |
|||
cdl["typerefs_unresolved"] = collect_typerefs(cdl) # only list of typenames |
|||
cdl["typerefs_unresolved"] = list(set(cdl["typerefs_unresolved"]) - (set(cdl["typerefs_resolved"]).union(set(cdl["typerefs_notfound"])))) |
|||
if (len(cdl["typerefs_unresolved"]) <= 0): |
|||
break |
|||
|
|||
(types_resolved, typerefs_resolved, notfound) = resolve_typerefs(header["ast"], cdl["typerefs_unresolved"]) |
|||
cdl["types_resolved"] += types_resolved |
|||
cdl["typerefs_resolved"] += typerefs_resolved |
|||
cdl["typerefs_notfound"] += notfound |
|||
|
|||
|
|||
(views["all_types"], views["dup_types"]) = find_dup_types(cdl) |
|||
|
|||
header["cdl"] = cdl |
|||
write_json(header["cdl"], header["out_dir"] + "/" + header["filename"] + ".cdl.json") |
|||
|
|||
header["views"] = views |
|||
write_json(header["views"], header["out_dir"] + "/" + header["filename"] + ".views.json") |
|||
|
|||
|
|||
def main(): |
|||
# main_old() |
|||
main_new() |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
main() |
Loading…
Reference in new issue