liberty/tools/cmake-parser.awk

253 lines
5.1 KiB
Awk

# cmake-parser.awk: rudimentary CMake script parser
#
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
# SPDX-License-Identifier: 0BSD
#
# Implemented roughly according to the grammar described in cmake-language(7),
# which is self-conflicting, and not an accurate description.
#
# The result of parsing is stored in the case-normalized Command variable,
# and the Args array. These can be used by subsequent scripts.
function warning(message) {
print FILENAME ":" FNR ": warning: " message > "/dev/stderr"
}
function fatal(message) {
print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr"
exit 1
}
function expect(v) {
if (!v && v == 0)
fatal("broken expectations at `" $0 "'")
return v
}
function literal(v) {
if (substr($0, 1, length(v)) != v)
return 0
$0 = substr($0, length(v) + 1)
return 1
}
function regexp(re) {
if (!match($0, "^" re))
return 0
$0 = substr($0, RLENGTH + 1)
return 1
}
function space() {
return regexp("[ \t]+")
}
function unbracket(len, v) {
do {
if (match($0, "]={" len "}]")) {
v = v substr($0, 1, RSTART - 1)
$0 = substr($0, RSTART + RLENGTH)
return v
}
v = v $0 RS
} while (getline > 0)
fatal("unterminated bracket")
}
function bracket_comment() {
if (!match($0, /^#\[=*\[/))
return 0
$0 = substr($0, RSTART + RLENGTH)
unbracket(RLENGTH - 3)
return 1
}
function line_ending() {
while (space() || bracket_comment()) {}
if (/^#/)
$0 = ""
return !$0
}
# ------------------------------------------------------------------------------
# While elementary expansion of previously set variables is implementable,
# it doesn't seem to be worth the effort.
function expand(s, v) {
v = s
while (match(v, /\\*[$](|ENV|CACHE)[{]/)) {
if (index(substr(v, RSTART), "$") % 2 != 0) {
warning("variable expansion is not supported: " s)
return s
}
v = substr(v, RSTART + RLENGTH)
}
return s
}
function escape_sequence( v) {
if (!literal("\\"))
return 0
if (literal("t")) return "\t"
if (literal("r")) return "\r"
if (literal("n")) return "\n"
# escape_semicolon isn't treated any specially here.
if (regexp("[A-Za-z0-9]"))
fatal("unsupported escape sequence")
if ($0) {
v = substr($0, 1, 1)
$0 = substr($0, 2)
return v
}
if (getline > 0)
return ""
fatal("premature end of file")
}
function quoted_argument( v, unescaped) {
if (!literal("\""))
return 0
v = ""
while (!literal("\"")) {
if (!$0) {
if (getline <= 0)
fatal("premature end of file")
v = v RS
} else if ((unescaped = escape_sequence())) {
if (unescaped == "\\" || unescaped == "$")
v = v "\\"
else if (unescaped == ";")
v = v "\\\\"
v = v unescaped
} else if (unescaped == "") {
# quoted_continuation
} else {
v = v substr($0, 1, 1)
$0 = substr($0, 2)
}
}
return v
}
function finalize_quoted(expanded, v) {
while (match(expanded, /\\./)) {
v = v substr(expanded, 1, RSTART - 1) \
substr(expanded, RSTART + 1, 1)
expanded = substr(expanded, RSTART + RLENGTH)
}
Args[++N] = v expanded
}
function unquoted_argument( v, unescaped) {
while (1) {
if (match($0, /^[^[:space:]()#"\\]+/)) {
v = v substr($0, RSTART, RLENGTH)
$0 = substr($0, RSTART + RLENGTH)
} else if ((unescaped = escape_sequence())) {
if (unescaped == "\\" || unescaped == "$" || unescaped == ";")
v = v "\\"
v = v unescaped
} else if (unescaped == "") {
fatal("unexpected backslash in an unquoted argument")
} else {
# unquoted_legacy is not supported.
return v
}
}
}
function finalize_unquoted(expanded, v) {
while (expanded) {
if (expanded ~ /^;/) {
if (v)
Args[++N] = v
v = ""
expanded = substr(expanded, 2)
} else if (expanded ~ /^\\./) {
v = v substr(expanded, 2, 1)
expanded = substr(expanded, 3)
} else {
v = v substr(expanded, 1, 1)
expanded = substr(expanded, 2)
}
}
if (v)
Args[++N] = v
}
# We keep and reprocess some escape sequences in here.
function argument( arg, expanded, v) {
if (regexp("\\[=*\\["))
Args[++N] = unbracket(RLENGTH - 2)
else if ((arg = quoted_argument()) || arg == "")
finalize_quoted(expand(arg))
else if ((arg = unquoted_argument()))
finalize_unquoted(expand(arg))
else
return 0
return 1
}
# ------------------------------------------------------------------------------
function identifier( v) {
if (!match($0, /^[A-Za-z_][A-Za-z0-9_]*/))
return 0
v = substr($0, 1, RLENGTH)
$0 = substr($0, RLENGTH + 1)
return v
}
function separation() {
if (space() || bracket_comment())
return 1
if (!line_ending())
return 0
if (getline > 0)
return 1
fatal("premature end of file")
}
function command_invocation( level) {
while (space()) {}
Command = identifier()
if (!Command)
return 0
while (space()) {}
Command = tolower(Command)
for (N in Args)
delete Args[N]
N = 0
expect(literal("("))
while (1) {
while (separation()) {}
if (literal(")")) {
if (!level--)
break
Args[++N] = ")"
continue
}
if (literal("(")) {
level++
Args[++N] = "("
continue
}
expect(argument())
if (!/^[()]/)
expect(separation())
}
return 1
}
{
command_invocation()
expect(line_ending())
}