Add a rudimentary CMake script parser
This commit is contained in:
parent
688c458095
commit
af2756ee01
|
@ -52,3 +52,7 @@ foreach (name ${tests})
|
|||
target_link_libraries (test-${name} ${common_libraries})
|
||||
add_test (NAME test-${name} COMMAND test-${name})
|
||||
endforeach ()
|
||||
|
||||
add_test (test-cmake-parser
|
||||
env LC_ALL=C awk -f ${PROJECT_SOURCE_DIR}/tools/cmake-parser.awk
|
||||
-f ${PROJECT_SOURCE_DIR}/tools/cmake-dump.awk ${CMAKE_CURRENT_LIST_FILE})
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
# cmake-dump.awk: dump parsed CMake scripts as tables
|
||||
#
|
||||
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
|
||||
# SPDX-License-Identifier: 0BSD
|
||||
#
|
||||
# Parsed scripts are output in a table, with commands separated using ASCII
|
||||
# Record Separators, and arguments using Unit Separators.
|
||||
#
|
||||
# Example usage: awk -f cmake-parser.awk -f cmake-dump.awk CMakeLists.txt \
|
||||
# | sed 'y/\x1F\x1E\t\n/\t\n /' \
|
||||
# | sed -n '/^project\t\([^\t]*\).*\tVERSION\t\([^\t]*\).*/{s//\1 \2/p;q;}'
|
||||
|
||||
function sanitize(s) {
|
||||
if (s ~ /[\x1E\x1F]/)
|
||||
fatal("conflicting ASCII control characters found in source")
|
||||
return s
|
||||
}
|
||||
|
||||
Command {
|
||||
out = sanitize(Command)
|
||||
for (i in Args)
|
||||
out = out "\x1F" sanitize(Args[i])
|
||||
printf "%s\x1E", out
|
||||
}
|
|
@ -0,0 +1,250 @@
|
|||
# cmake-parser.awk: rudimentary CMake script parser
|
||||
#
|
||||
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
|
||||
# SPDX-License-Identifier: 0BSD
|
||||
#
|
||||
# Implemented roughly according to the grammar described in cmake-language(7),
|
||||
# which is self-conflicting, and not an accurate description.
|
||||
#
|
||||
# The result of parsing is stored in the case-normalized Command variable,
|
||||
# and the Args array. These can be used by subsequent scripts.
|
||||
|
||||
function warning(message) {
|
||||
print FILENAME ":" FNR ": warning: " message > "/dev/stderr"
|
||||
}
|
||||
|
||||
function fatal(message) {
|
||||
print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr"
|
||||
exit 1
|
||||
}
|
||||
|
||||
function expect(v) {
|
||||
if (!v && v == 0)
|
||||
fatal("broken expectations at `" $0 "'")
|
||||
return v
|
||||
}
|
||||
|
||||
function literal(v) {
|
||||
if (substr($0, 1, length(v)) != v)
|
||||
return 0
|
||||
$0 = substr($0, length(v) + 1)
|
||||
return 1
|
||||
}
|
||||
|
||||
function regexp(re) {
|
||||
if (!match($0, "^" re))
|
||||
return 0
|
||||
$0 = substr($0, RLENGTH + 1)
|
||||
return 1
|
||||
}
|
||||
|
||||
function space() {
|
||||
return regexp("[ \t]+")
|
||||
}
|
||||
|
||||
function unbracket(len, v) {
|
||||
do {
|
||||
if (match($0, "]={" len "}]")) {
|
||||
v = v substr($0, 1, RSTART - 1)
|
||||
$0 = substr($0, RSTART + RLENGTH)
|
||||
return v
|
||||
}
|
||||
v = v $0 RS
|
||||
} while (getline > 0)
|
||||
fatal("unterminated bracket")
|
||||
}
|
||||
|
||||
function bracket_comment() {
|
||||
if (!match($0, /^#\[=*\[/))
|
||||
return 0
|
||||
$0 = substr($0, RSTART + RLENGTH)
|
||||
unbracket(RLENGTH - 3)
|
||||
return 1
|
||||
}
|
||||
|
||||
function line_ending() {
|
||||
while (space() || bracket_comment()) {}
|
||||
if (/^#/)
|
||||
$0 = ""
|
||||
return !$0
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
# While elementary expansion of previously set variables is implementable,
|
||||
# it doesn't seem to be worth the effort.
|
||||
function expand(s, v) {
|
||||
v = s
|
||||
while (match(v, /\\*[$](|ENV|CACHE)[{]/)) {
|
||||
if (index(substr(v, RSTART), "$") % 2 != 0) {
|
||||
warning("variable expansion is not supported: " s)
|
||||
return s
|
||||
}
|
||||
v = substr(v, RSTART + RLENGTH)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
function escape_sequence( v) {
|
||||
if (!literal("\\"))
|
||||
return 0
|
||||
|
||||
if (literal("t")) return "\t"
|
||||
if (literal("r")) return "\r"
|
||||
if (literal("n")) return "\n"
|
||||
|
||||
# escape_semicolon isn't treated any specially here.
|
||||
if (regexp("[A-Za-z0-9]"))
|
||||
fatal("unsupported escape sequence")
|
||||
|
||||
if ($0) {
|
||||
v = substr($0, 1, 1)
|
||||
$0 = substr($0, 2)
|
||||
return v
|
||||
}
|
||||
if (getline > 0)
|
||||
return ""
|
||||
fatal("premature end of file")
|
||||
}
|
||||
|
||||
function quoted_argument( v, unescaped) {
|
||||
if (!literal("\""))
|
||||
return 0
|
||||
|
||||
v = ""
|
||||
while (!literal("\"")) {
|
||||
if (!$0) {
|
||||
if (getline <= 0)
|
||||
fatal("premature end of file")
|
||||
v = v RS
|
||||
} else if ((unescaped = escape_sequence())) {
|
||||
if (unescaped == "\\" || unescaped == "$")
|
||||
v = v "\\"
|
||||
else if (unescaped == ";")
|
||||
v = v "\\\\"
|
||||
v = v unescaped
|
||||
} else if (unescaped == "") {
|
||||
# quoted_continuation
|
||||
} else {
|
||||
v = v substr($0, 1, 1)
|
||||
$0 = substr($0, 2)
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
function unquoted_argument( v, unescaped) {
|
||||
while (1) {
|
||||
if (match($0, /^[^[:space:]()#"\\]+/)) {
|
||||
v = v substr($0, RSTART, RLENGTH)
|
||||
$0 = substr($0, RSTART + RLENGTH)
|
||||
} else if ((unescaped = escape_sequence())) {
|
||||
if (unescaped == "\\" || unescaped == "$" || unescaped == ";")
|
||||
v = v "\\"
|
||||
v = v unescaped
|
||||
} else if (unescaped == "") {
|
||||
fatal("unexpected backslash in an unquoted argument")
|
||||
} else {
|
||||
# unquoted_legacy is not supported.
|
||||
return v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Note that we keep and reprocess some escape sequences in here.
|
||||
function argument( arg, expanded, v) {
|
||||
if (regexp("\\[=*\\[")) {
|
||||
Args[++N] = unbracket(RLENGTH - 2)
|
||||
return 1
|
||||
}
|
||||
if ((arg = quoted_argument()) || arg == "") {
|
||||
expanded = expand(arg)
|
||||
while (match(expanded, /\\./)) {
|
||||
v = v substr(expanded, 1, RSTART - 1) \
|
||||
substr(expanded, RSTART + 1, 1)
|
||||
expanded = substr(expanded, RSTART + RLENGTH)
|
||||
}
|
||||
Args[++N] = v expanded
|
||||
return 1
|
||||
}
|
||||
if ((arg = unquoted_argument())) {
|
||||
expanded = expand(arg)
|
||||
while (expanded) {
|
||||
if (expanded ~ /^;/) {
|
||||
if (v)
|
||||
Args[++N] = v
|
||||
v = ""
|
||||
expanded = substr(expanded, 2)
|
||||
} else if (expanded ~ /^\\./) {
|
||||
v = v substr(expanded, 2, 1)
|
||||
expanded = substr(expanded, 3)
|
||||
} else {
|
||||
v = v substr(expanded, 1, 1)
|
||||
expanded = substr(expanded, 2)
|
||||
}
|
||||
}
|
||||
if (v)
|
||||
Args[++N] = v
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
function identifier( v) {
|
||||
if (!match($0, /^[A-Za-z_][A-Za-z0-9_]*/))
|
||||
return 0
|
||||
v = substr($0, 1, RLENGTH)
|
||||
$0 = substr($0, RLENGTH + 1)
|
||||
return v
|
||||
}
|
||||
|
||||
function separation() {
|
||||
if (space() || bracket_comment())
|
||||
return 1
|
||||
|
||||
if (!line_ending())
|
||||
return 0
|
||||
if (getline > 0)
|
||||
return 1
|
||||
fatal("premature end of file")
|
||||
}
|
||||
|
||||
function command_invocation( level) {
|
||||
while (space()) {}
|
||||
Command = identifier()
|
||||
if (!Command)
|
||||
return 0
|
||||
while (space()) {}
|
||||
|
||||
Command = tolower(Command)
|
||||
for (N in Args)
|
||||
delete Args[N]
|
||||
|
||||
N = 0
|
||||
expect(literal("("))
|
||||
while (1) {
|
||||
while (separation()) {}
|
||||
if (literal(")")) {
|
||||
if (!level--)
|
||||
break
|
||||
Args[++N] = ")"
|
||||
continue
|
||||
}
|
||||
if (literal("(")) {
|
||||
level++
|
||||
Args[++N] = "("
|
||||
continue
|
||||
}
|
||||
expect(argument())
|
||||
if (!/^[()]/)
|
||||
expect(separation())
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
{
|
||||
command_invocation()
|
||||
expect(line_ending())
|
||||
}
|
Loading…
Reference in New Issue