Browse Source

libclang based header file parsing (json output for now)

master
heck 5 years ago
parent
commit
3205b9dbe4
  1. 94
      gen/extract.py

94
gen/extract.py

@ -1,10 +1,11 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import glob
import re
import os
import pprint
import json
import clang.cindex
from clang.cindex import CursorKind
def create_paths_list(dirname, filenames):
@ -27,61 +28,92 @@ def read_file(path):
with open(path) as f:
file_content = f.read()
item = {"path": path,
"content": file_content}
"content_orig": file_content}
return item
def extract_functions(file_content):
pattr = re.compile("DYNAMIC_API.*?\);", re.DOTALL)
res = pattr.findall(file_content)
return res
def write_json(header):
header["outpath"] += ".json"
with open(header.get("outpath"), "w+") as f:
json.dump(header, f, indent=4)
def process(header, out_dir):
# Process and create data structure
print("processing path: " + header.get("path") + "...")
def prepare_header(header, out_dir):
basename = os.path.basename(header.get("path"))
# strip suffix, will be added output format dependently
basename_minus_suffix = re.sub("\..*$",'',basename)
# add outpath
outpath = out_dir + basename_minus_suffix
outpath = out_dir + basename
header["outpath"] = outpath
return header
# add functions
functions = extract_functions(header.get("content"))
header["functions"] = functions
return header
## PARSE
def clang_parse(filename, content):
index = clang.cindex.Index.create()
arguments = ["-x", "c"]
# arguments = ["-x", "c++", "-D__CODEGEN__"]
options = clang.cindex.TranslationUnit.PARSE_SKIP_FUNCTION_BODIES
def write_json(header):
header["outpath"] += ".json"
with open(header.get("outpath"), "w+") as f:
json.dump(header, f, indent=4)
content = [(filename, content)]
translation_unit = index.parse(filename, unsaved_files=content, args=arguments, options=options)
ret = heckparse(translation_unit.cursor, filename)
return ret
def get_children_filelocal(cursor, path):
return [c for c in cursor.get_children() if c.location.file and c.location.file.name == path]
def heckparse(cursor, path):
item = {}
item["type"] = str(cursor.kind)
item["name"] = cursor.spelling
if cursor.kind == CursorKind.FUNCTION_DECL:
item["proto"] = cursor.displayname
if cursor.kind == CursorKind.PARM_DECL:
item["c_type"] = str(cursor.type.spelling)
if cursor.kind == CursorKind.ENUM_CONSTANT_DECL:
item["value"] = cursor.enum_value
children = get_children_filelocal(cursor, path)
if len(children) > 0:
childy = []
for child in children:
childy.append(heckparse(child, path))
item["children"] = childy
return item
## INIT
def init_libclang(library_file=None):
if not clang.cindex.Config.loaded:
print("Using libclang from: %s", library_file)
clang.cindex.Config.set_library_file(library_file)
def main():
input()
init_libclang("/opt/local/libexec/llvm-9.0/lib/libclang.dylib")
# Input
prefix = r"/Users/heck/local-default/"
filenames = ["pEpEngine.h",
"keymanagement.h"]
# Output
out_dir = "data/"
basename = prefix + r"include/pEp/"
out_dir = "data/output/"
if not os.path.isdir(out_dir):
os.makedirs(out_dir)
paths = create_paths_list(basename, filenames)
in_dir = prefix + r"include/pEp/"
paths = create_paths_list(in_dir, filenames)
headers = read_files(paths)
for header in headers:
# Process
header = process(header, out_dir)
# Output
header = prepare_header(header, out_dir)
print("processing path: " + header.get("path") + "...")
header["content_parsed"] = clang_parse(header["path"],header["content_orig"])
write_json(header)

Loading…
Cancel
Save