Browse Source

rename losing diff sucks, but...

master
heck 5 years ago
parent
commit
261f0e4ca6
  1. 143
      gen/extract.py
  2. 430
      gen/gen_cdl.py

143
gen/extract.py

@ -1,143 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import json
from ast_parser import ASTParser
from simple_ast import SimpleAST
def join_dir_and_filenames(dirname, filenames):
paths = []
for basename in filenames:
path = dirname + basename
paths.append(path)
return paths
def read_file(path):
with open(path) as f:
file_content = f.read()
return file_content
# out-dir is in-dir if not specified
def create_header(path, out_dir=None):
header = {"path": "",
"dir": "",
"filename": "",
"out_dir": "",
"sourcecode": ""}
header["path"] = path
header["dir"] = os.path.dirname(path)
header["filename"] = os.path.basename(path)
header["out_dir"] = header["dir"]
if out_dir:
header["out_dir"] = out_dir
header["sourcecode"] = read_file(path)
return header
def write_json(content, outpath):
# create path if not existing
out_dir = os.path.dirname(outpath)
if not os.path.isdir(out_dir):
os.makedirs(out_dir)
# write
with open(outpath, "w+") as f:
json.dump(content, f, indent=4)
# generates simple-ast for each header specified in spec out dir.
def main_old():
parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
in_dir = r"/Users/heck/local-default/include/pEp/"
filenames = ["pEpEngine.h",
"keymanagement.h",
"message_api.h",
"message.h",
"sync_api.h",
"key_reset.h",
"Identity.h",
"Rating.h"]
out_dir = "data/output"
paths = join_dir_and_filenames(in_dir, filenames)
headers = []
for path in paths:
headers.append(create_header(path, out_dir))
for header in headers:
print("processing path: " + header["path"] + "...")
header["ast"] = parser.parse(header["path"], header["sourcecode"])
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json")
simpleAst = SimpleAST()
header["simple_ast"] = simpleAst.create_simple_ast(header["ast"])
write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json")
def recursive_query(data, filter, transform=lambda x: x):
resultset = []
# decorator just handling exceptions
def filter_decorator(data):
try:
return filter(data)
except KeyError:
pass
# filter current data
if filter_decorator(data):
# transform result
xformed = transform(data)
if xformed:
resultset.append(xformed)
# recurse
if "children" in data:
for item in data["children"]:
childres = recursive_query(item, filter, transform)
if childres:
resultset += childres
return resultset
def main_new():
parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
# header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./")
header = create_header("data/input/test_data/main_include.h")
header["ast"] = parser.parse(header["path"], follow_includes=True)
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json")
# query
def filter_xzy(item):
if (item["is_definition"] == False
and item["kind"] == "CursorKind.STRUCT_DECL"
):
return True
def xform(item):
return item
matches = recursive_query(header["ast"], filter_xzy, xform)
# matches = list(set(matches))
write_json(matches, header["out_dir"] + "/" + header["filename"] + ".matches.json")
def main():
main_old()
# main_new()
if __name__ == "__main__":
main()

430
gen/gen_cdl.py

@ -0,0 +1,430 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import json
from ast_parser import ASTParser
from simple_ast import SimpleAST
def join_dir_and_filenames(dirname, filenames):
paths = []
for basename in filenames:
path = dirname + basename
paths.append(path)
return paths
def read_file(path):
with open(path) as f:
file_content = f.read()
return file_content
# out-dir is in-dir if not specified
def create_header(path, out_dir=None):
header = {"path": "",
"dir": "",
"filename": "",
"out_dir": "",
"sourcecode": ""}
header["path"] = path
header["dir"] = os.path.dirname(path)
header["filename"] = os.path.basename(path)
header["out_dir"] = header["dir"]
if out_dir:
header["out_dir"] = out_dir
header["sourcecode"] = read_file(path)
return header
def write_json(content, outpath):
# create path if not existing
out_dir = os.path.dirname(outpath)
if not os.path.isdir(out_dir):
os.makedirs(out_dir)
# write
with open(outpath, "w+") as f:
json.dump(content, f, indent=4)
# generates simple-ast for each header specified in spec out dir.
def main_old():
parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
in_dir = r"/Users/heck/local-default/include/pEp/"
filenames = ["pEpEngine.h",
"keymanagement.h",
"message_api.h",
"message.h",
"sync_api.h",
"key_reset.h",
"Identity.h",
"Rating.h"]
out_dir = "data/output"
paths = join_dir_and_filenames(in_dir, filenames)
headers = []
for path in paths:
headers.append(create_header(path, out_dir))
for header in headers:
print("processing path: " + header["path"] + "...")
header["ast"] = parser.parse(header["path"], header["sourcecode"])
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json")
simpleAst = SimpleAST()
header["simple_ast"] = simpleAst.create_simple_ast(header["ast"])
write_json(header["simple_ast"], header["out_dir"] + "/" + header["filename"] + ".simple_ast.json")
# works on valid json like structure lists/dict
def recursive_query(data, filter, transform=lambda x: x):
# all datatypes accepted
# print(type(data))
# assert (type(data) in [list, dict, int, str, bool])
resultset = []
# decorator just handling exceptions
def filter_decorator(data):
try:
return filter(data)
except KeyError:
pass
# filter current data
# only dict types are filtered
if type(data) in [dict]:
if filter_decorator(data):
# transform result
xformed = transform(data)
if xformed:
resultset.append(xformed)
if isinstance(data, dict):
for item in data.values():
childres = recursive_query(item, filter, transform)
if childres:
resultset += childres
elif isinstance(data, list):
for item in data:
childres = recursive_query(item, filter, transform)
if childres:
resultset += childres
return resultset
def extract_forward_declarations(data):
def filter(item):
if (item["is_definition"] == False
and item["kind"] == "CursorKind.STRUCT_DECL"
):
return True
def xform(item):
return item
return recursive_query(data, filter, xform)
def extract_functions(data, function_names):
def filter(item):
if (item["kind"] == "CursorKind.FUNCTION_DECL"
and item["name"] in function_names
):
return True
def xform(item):
return item
return recursive_query(data, filter, xform)
# just the typenames of all typerefs no dups
def collect_typerefs(data):
def filter(item):
if (item["kind"] == "CursorKind.TYPE_REF"
):
return True
def xform(item):
return item["type"]
results = recursive_query(data, filter, xform)
no_dups = list(set(results))
return no_dups
def extract_vars(data, var_names):
def filter(item):
if (item["kind"] == "CursorKind.VAR_DECL"
and item["name"] in var_names
):
return True
def xform(item):
return item
return recursive_query(data, filter, xform)
def extract_types(data):
def filter(item):
if (item["kind"] == "CursorKind.TYPEDEF_DECL"
or item["kind"] == "CursorKind.ENUM_DECL"
or item["kind"] == "CursorKind.STRUCT_DECL"
):
return True
def xform(item):
# ret = {}
# ret["name"] = item["name"]
# if "utypekind" in item:
# ret["utypekind"] = item["utypekind"]
# else:
# ret["utypekind"] = item["type"]
ret = item["name"]
return ret
return recursive_query(data, filter, xform)
def remove_dup_dicts(arr_of_dicts):
arr_no_dups = [dict(i) for i in {tuple(d.items()) for d in arr_of_dicts}]
return arr_no_dups
def resolve_typerefs(ast, typeref_names):
types = []
typeref_resolved = []
typeref_notfound = []
for typeref_name in typeref_names:
res = resolve_typeref(ast, typeref_name)
if not res:
typeref_notfound.append(typeref_name)
else:
types.append(res)
typeref_resolved.append(typeref_name)
return (types, typeref_resolved, typeref_notfound)
def resolve_typeref(ast, typeref_name):
ret = None
ret = extract_enum_decl(ast, typeref_name)
if not ret:
ret = extract_struct_decl(ast, typeref_name)
if not ret:
ret = extract_typedef_decl(ast, typeref_name)
return ret
def extract_enum_decl(ast, name):
ret = None
def filter(data):
if (data["kind"] == "CursorKind.ENUM_DECL"
and data["type"] == name
):
return True
res = recursive_query(ast, filter)
if len(res) > 1:
assert (False, "duplicate definition")
if len(res) == 1:
ret = res[0]
return ret
def extract_struct_decl(ast, name):
ret = None
def filter(data):
if (data["kind"] == "CursorKind.STRUCT_DECL"
and data["type"] == name
):
return True
res = recursive_query(ast, filter)
if len(res) > 1:
assert (False, "duplicate definition")
if len(res) == 1:
ret = res[0]
return ret
def extract_typedef_decl(ast, name):
ret = None
def filter(data):
if (data["kind"] == "CursorKind.TYPEDEF_DECL"
and data["type"] == name
):
return True
res = recursive_query(ast, filter)
if res:
ret = res.pop()
if ret["utypekind"] == "Typedef":
ret = extract_typedef_decl(ast, ret["utype"])
elif ret["utypekind"] == "Elaborated":
ret = extract_enum_decl(ast, ret["utype"]) or extract_struct_decl(ast, ret["utype"])
return ret
# def remove_already_resolved(resolved, typerefs):
# unresolved = []
# for typeref in typerefs:
# def filter(data):
# if (data["kind"] == "CursorKind.STRUCT_DECL"
# or data["kind"] == "CursorKind.ENUM_DECL" #maybe typedef
# and data["type"] == typeref["type"]):
# return True
#
# contained = recursive_query(resolved, filter)
# if not contained:
# unresolved.append(typeref)
# return unresolved
def find_dup_types(data):
def filter(item):
if (item["kind"] == "CursorKind.STRUCT_DECL"
or item["kind"] == "CursorKind.ENUM_DECL"):
return True
def xform(item):
return item["type"]
all_types = recursive_query(data, filter, xform)
dups = set()
for type in all_types:
if all_types.count(type) > 1:
dups.add(type)
return (all_types, list(dups))
def main_new():
ast_parser = ASTParser("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
function_names = {""}
# function_names.add("func_void_void")
# function_names.add("func_void_int")
# function_names.add("func_int_void")
# function_names.add("func_int_int")
# function_names.add("func__PS_V")
# function_names.add("func__E_V")
# function_names.add("func_TP_V")
# function_names.add("func_V_TP")
# function_names.add("func_TP_TP")
# function_names.add("func_APS_V")
# function_names.add("func_V_APS")
# function_names.add("func_APS_APS")
# function_names.add("func_TPS_V")
# function_names.add("func_V_TPS")
# function_names.add("func_V_TPS")
# function_names.add("func_TPS_TPS")
# function_names.add("func_TPS_TPS")
var_names = {""}
# var_names.add("var_P")
# var_names.add("var__E")
# var_names.add("var_E")
# var_names.add("var_AE")
# var_names.add("var__PS")
# var_names.add("var_PS")
# var_names.add("var_APS")
# var_names.add("var__CS")
# var_names.add("var_CS")
# var_names.add("var_ACS")
# var_names.add("var__CCS")
# var_names.add("var_CCS")
# var_names.add("var_ACCS")
var_names.add("var__HS")
# var_names.add("var__NCS")
# var_names.add("var_HS")
# var_names.add("var__NSP")
# var_names.add("var_AHS")
# var_names.add("var__HHS")
# var_names.add("var__NHS")
# var_names.add("var__NNPS")
# var_names.add("var_HHS")
# var_names.add("var__NHS1")
# var_names.add("var__NNCS")
# var_names.add("var__NENHS")
# var_names.add("var_ASHS")
# var_names.add("var__HS1")
# var_names.add("var_NEHS1")
# header = create_header("/Users/heck/local-default/include/pEp/pEpEngine.h", out_dir="./")
header = create_header("data/input/test_data/test_lib.h")
header["ast"] = ast_parser.parse(header["path"], follow_includes=True)
write_json(header["ast"], header["out_dir"] + "/" + header["filename"] + ".ast.json")
# VIEWS
views = {}
views["forward_declarations"] = extract_forward_declarations(header["ast"])
views["types"] = extract_types(header["ast"])
views["types_count"] = len(views["types"])
# CDL
# AST nonrmalizing
cdl = {"functions": "",
"vars": "",
"structs": "",
"enums": ""}
# stage 1: extract functions and vars
cdl["functions"] = extract_functions(header["ast"], function_names)
cdl["vars"] = extract_vars(header["ast"], var_names)
# stage 2: collect type refs
cdl["types_resolved"] = []
cdl["typerefs_unresolved"] = []
cdl["typerefs_resolved"] = []
cdl["typerefs_notfound"] = []
while True:
cdl["typerefs_unresolved"] = collect_typerefs(cdl) # only list of typenames
cdl["typerefs_unresolved"] = list(set(cdl["typerefs_unresolved"]) - (set(cdl["typerefs_resolved"]).union(set(cdl["typerefs_notfound"]))))
if (len(cdl["typerefs_unresolved"]) <= 0):
break
(types_resolved, typerefs_resolved, notfound) = resolve_typerefs(header["ast"], cdl["typerefs_unresolved"])
cdl["types_resolved"] += types_resolved
cdl["typerefs_resolved"] += typerefs_resolved
cdl["typerefs_notfound"] += notfound
(views["all_types"], views["dup_types"]) = find_dup_types(cdl)
header["cdl"] = cdl
write_json(header["cdl"], header["out_dir"] + "/" + header["filename"] + ".cdl.json")
header["views"] = views
write_json(header["views"], header["out_dir"] + "/" + header["filename"] + ".views.json")
def main():
# main_old()
main_new()
if __name__ == "__main__":
main()
Loading…
Cancel
Save