Rewrite sdn-view AWK core in C++
All checks were successful
Alpine 3.20 Success
Arch Linux AUR Success
OpenBSD 7.5 Success

sdn-view is still slower than I'd like it to be,
just no longer ridiculously so.
This commit is contained in:
Přemysl Eric Janouch 2024-12-28 18:59:45 +01:00
parent 3075d47aeb
commit b070df6010
Signed by: p
GPG Key ID: A0420B94F92B9493
4 changed files with 256 additions and 186 deletions

View File

@ -26,9 +26,14 @@ target_compile_features (${PROJECT_NAME} PUBLIC cxx_std_14)
target_compile_definitions (${PROJECT_NAME} PUBLIC
-DPROJECT_NAME=\"${PROJECT_NAME}\" -DPROJECT_VERSION=\"${PROJECT_VERSION}\")
add_executable (${PROJECT_NAME}-mc-ext ${PROJECT_NAME}-mc-ext.cpp)
target_compile_features (${PROJECT_NAME}-mc-ext PUBLIC cxx_std_17)
include (GNUInstallDirs)
install (TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR})
install (PROGRAMS ${PROJECT_NAME}-install ${PROJECT_NAME}-view
# sdn-mc-ext should be in libexec, but we prefer it in PATH.
install (TARGETS sdn sdn-mc-ext
DESTINATION ${CMAKE_INSTALL_BINDIR})
install (PROGRAMS sdn-install sdn-view
DESTINATION ${CMAKE_INSTALL_BINDIR})
install (FILES sdn.1 sdn-install.1 sdn-view.1
DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)

View File

@ -25,7 +25,7 @@ or as a https://git.janouch.name/p/nixexprs[Nix derivation].
Building
--------
Build dependencies: CMake and/or make, a C++14 compiler, pkg-config +
Build dependencies: CMake and/or make, a C++17 compiler, pkg-config +
Runtime dependencies: ncursesw, libacl (on Linux)
// Working around libasciidoc's missing support for escaping it like \++

222
sdn-mc-ext.cpp Normal file
View File

@ -0,0 +1,222 @@
//
// sdn-mc-ext: Midnight Commander extension file processor
//
// Copyright (c) 2024, Přemysl Eric Janouch <p@janouch.name>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
#include <cstdlib>
#include <cctype>
#include <iostream>
#include <regex>
#include <string>
#include <unordered_map>
#include <vector>
// Trailing return types make C++ syntax suck considerably less
#define fun static auto
using namespace std;
// It is completely fine if this only modifies ASCII letters.
fun tolower (const string &s) -> string {
string result;
for (auto c : s) result += tolower (c);
return result;
}
fun shell_escape (const string &v) -> string {
return "'" + regex_replace (v, regex {"'"}, "'\\''") + "'";
}
string arg_type, arg_path, arg_basename, arg_dirname, arg_verb;
unordered_map<string, unordered_map<string, string>> sections;
fun expand_command (string command) -> pair<string, string> {
regex re_sequence {R"(%(%|[[:alpha:]]*\{([^}]*)\}|[[:alpha:]]+))"};
regex re_name {R"([^{}]*)"};
regex re_parameter {R"([^,]+")"};
string kind, out, pipe; smatch m;
while (regex_search (command, m, re_sequence)) {
out.append (m.prefix ());
auto seq = m.str (1);
command = m.suffix ();
string argument = m.str (2);
if (regex_search (seq, m, re_name))
seq = m.str ();
if (seq == "%") {
out += "%";
} else if (seq == "p") {
out += shell_escape (arg_basename);
} else if (seq == "f") {
out += shell_escape (arg_path);
} else if (seq == "d") {
out += shell_escape (arg_dirname);
} else if (seq == "var") {
string value;
if (auto colon = argument.find (':'); colon == argument.npos) {
if (auto v = getenv (argument.c_str ()))
value = v;
} else {
value = argument.substr (colon + 1);
if (auto v = getenv (argument.substr (0, colon).c_str ()))
value = v;
}
out += shell_escape (value);
} else if (seq == "cd") {
kind = seq;
command = regex_replace (command, regex {"^ +"}, "");
} else if (seq == "view") {
kind = seq;
command = regex_replace (command, regex {"^ +"}, "");
sregex_token_iterator it (argument.begin (), argument.end (),
re_parameter, 0), end;
for (; it != end; it++) {
if (*it == "hex")
pipe.append (" | od -t x1");
// more(1) and less(1) either ignore or display this:
//if (*it == "nroff")
// pipe.append (" | col -b");
}
} else if (seq == "") {
cerr << "sdn-mc-ext: prompting not supported" << endl;
return {};
} else {
cerr << "sdn-mc-ext: unsupported: %" << seq << endl;
return {};
}
}
return {kind,
pipe.empty () ? out.append (command) : "(" + out + ")" + pipe};
}
fun print_command (string cmd) {
auto command = expand_command (cmd);
cout << get<0> (command) << endl << get<1> (command) << endl;
}
fun section_matches (const unordered_map<string, string> &section) -> bool {
if (section.count ("Directory"))
return false;
// The configuration went through some funky changes;
// unescape \\ but leave other escapes alone.
auto filter_re = [](const string &s) {
string result;
for (size_t i = 0; i < s.length (); ) {
auto c = s[i++];
if (c == '\\' && i < s.length ())
if (c = s[i++]; c != '\\')
result += '\\';
result += c;
}
return result;
};
auto is_true = [&](const string &name) {
auto value = section.find (name);
return value != section.end () && value->second == "true";
};
if (auto kv = section.find ("Type"); kv != section.end ()) {
auto flags = std::regex::ECMAScript;
if (is_true ("TypeIgnoreCase"))
flags |= regex_constants::icase;
if (!regex_search (arg_type, regex {filter_re (kv->second), flags}))
return false;
}
auto basename = arg_basename;
if (auto kv = section.find ("Regex"); kv != section.end ()) {
auto flags = std::regex::ECMAScript;
if (is_true ("RegexIgnoreCase"))
flags |= regex_constants::icase;
return regex_search (basename, regex {filter_re (kv->second), flags});
}
if (auto kv = section.find ("Shell"); kv != section.end ()) {
auto value = kv->second;
if (is_true ("ShellIgnoreCase")) {
value = tolower (value);
basename = tolower (arg_basename);
}
if (value.empty () || value[0] != '.')
return value == basename;
return basename.length () >= value.length () &&
basename.substr (basename.length () - value.length ()) == value;
}
return !arg_type.empty ();
}
fun process (const string &section) -> bool {
auto full = sections.at (section);
if (auto include = full.find ("Include"); include != full.end ()) {
full.erase ("Open");
full.erase ("View");
full.erase ("Edit");
if (auto included = sections.find ("Include/" + include->second);
included != sections.end ()) {
for (const auto &kv : included->second)
full[kv.first] = kv.second;
}
}
if (getenv ("SDN_MC_EXT_DEBUG")) {
cerr << "[" << section << "]" << endl;
for (const auto &kv : full)
cerr << " " << kv.first << ": " << kv.second << endl;
}
if (full.count (arg_verb) && section_matches (full)) {
print_command (full[arg_verb]);
return true;
}
return false;
}
int main (int argc, char *argv[]) {
if (argc != 6) {
cerr << "Usage: " << argv[0]
<< " TYPE PATH BASENAME DIRNAME VERB < mc.ext.ini" << endl;
return 2;
}
arg_type = argv[1];
arg_path = argv[2], arg_basename = argv[3], arg_dirname = argv[4];
arg_verb = argv[5];
string line, section;
vector<string> order;
regex re_entry {R"(^([-\w]+) *= *(.*)$)"};
smatch m;
while (getline (cin, line)) {
if (line.empty () || line[0] == '#') {
continue;
} else if (auto length = line.length();
line.find_last_of ('[') == 0 &&
line.find_first_of (']') == length - 1) {
order.push_back ((section = line.substr (1, length - 2)));
} else if (regex_match (line, m, re_entry)) {
sections[section][m[1]] = m[2];
}
}
for (const auto &section : order) {
if (section == "mc.ext.ini" ||
section == "Default" ||
section.substr (0, 8) == "Include/")
continue;
if (process (section))
return 0;
}
print_command (sections["Default"][arg_verb]);
return 0;
}

209
sdn-view
View File

@ -14,198 +14,41 @@ if command -v mc >/dev/null
then datadir=$(mc --datadir | sed 's/ (.*)$//')
fi
export SDN_VIEW_CONFIG=
config=
for dir in "$HOME"/.config/mc "$datadir" /etc/mc
do
if [ -n "$dir" -a -f "$dir/mc.ext.ini" ]
then
SDN_VIEW_CONFIG=$dir/mc.ext.ini
config=$dir/mc.ext.ini
break
fi
done
# This is often used in %env{} expansion, so let's be on the same page.
export PAGER=${PAGER:-less}
export MC_EXT_FILENAME=$(realpath "$1")
export MC_EXT_BASENAME=$(basename "$1")
export MC_EXT_CURRENTDIR=$(dirname "$1")
export SDN_VIEW_TYPE=$(file -bz "$1")
process() (awk -f - <<'EOF'
BEGIN {
if (!(Config = ENVIRON["SDN_VIEW_CONFIG"]))
exit
export MC_EXT_CURRENTDIR=$(dirname "$MC_EXT_FILENAME")
output=$(sdn-mc-ext <"$config" "$(file -Lbz "$1")" \
"$MC_EXT_FILENAME" "$MC_EXT_BASENAME" "$MC_EXT_CURRENTDIR" View || :)
kind=$(echo "$output" | sed -n 1p)
command=$(echo "$output" | sed -n 2p)
Verb = "View"
Section = ""
while ((getline < Config) > 0) {
if (/^\s*(#.*)?$/) {
# Skip.
} else if (/^\[[^]]+\]$/) {
Sections[++SectionsLen] = Section = substr($0, 2, length($0) - 2)
} else if (/^[^=]+=[^=]*$/) {
split($0, kv, "=")
Keys[Section, kv[1]] = kv[2]
}
}
Type = ENVIRON["SDN_VIEW_TYPE"]
Path = ENVIRON["MC_EXT_FILENAME"]
Basename = ENVIRON["MC_EXT_BASENAME"]
Dirname = ENVIRON["MC_EXT_CURRENTDIR"]
for (i = 1; i <= SectionsLen; i++) {
if (Sections[i] == "mc.ext.ini" ||
Sections[i] == "Default" ||
Sections[i] ~ /^Include[\/]/)
continue
try(Sections[i])
}
# Not attempting any inclusions here.
print expand_command(Keys["Default", Verb])
}
function try(section, pair, a, key, full, include) {
for (pair in Keys) {
split(pair, a, SUBSEP)
if (a[1] == section)
full[a[2]] = Keys[pair]
}
if ("Include" in full) {
delete full["Open"]
delete full["View"]
delete full["Edit"]
include = "Include/" full["Include"]
for (pair in Keys) {
split(pair, a, SUBSEP)
if (a[1] == include)
full[a[2]] = Keys[pair]
}
}
if (ENVIRON["SDN_VIEW_DEBUG"]) {
print "[" section "]" > "/dev/stderr"
for (key in full)
print " " key ": " full[key] > "/dev/stderr"
}
if (Verb in full && section_matches(full, Type, Basename)) {
print expand_command(full[Verb])
exit
}
}
function shell_escape(string) {
gsub(/'/, "'\\''", string)
return "'" string "'"
}
function expand_command(cmd, toview, out, seq, argument, value, a, pipe) {
out = ""
while (match(cmd, /%[a-zA-Z]*\{[^}]*\}|%[a-zA-Z]+|%%/)) {
out = out substr(cmd, 1, RSTART - 1)
seq = substr(cmd, RSTART + 1, RLENGTH - 1)
cmd = substr(cmd, RSTART + RLENGTH)
argument = ""
if (match(seq, /\{.*\}$/)) {
argument = substr(seq, RSTART + 1, RLENGTH - 2)
seq = substr(seq, 1, RSTART - 1)
}
if (seq == "%") {
out = out "%"
} else if (seq == "p") {
out = out shell_escape(Basename)
} else if (seq == "f") {
out = out shell_escape(Path)
} else if (seq == "d") {
out = out shell_escape(Dirname)
} else if (seq == "view") {
toview = 1
sub(/^ +/, "", cmd)
split(argument, a, /,/)
for (value in a) {
if (a[value] == "hex")
pipe = pipe " | od -t x1"
# more(1) and less(1) either ignore or display this:
#if (a[value] == "nroff")
# pipe = pipe " | col -b"
}
} else if (seq == "var") {
value = ""
if (!match(argument, /:.*/)) {
if (argument in ENVIRON)
value = ENVIRON[argument]
} else {
value = substr(argument, RSTART + 1)
argument = substr(argument, 1, RSTART - 1)
if (argument in ENVIRON)
value = ENVIRON[argument]
}
out = out shell_escape(value)
} else if (seq == "") {
print Config ": prompting not supported" > "/dev/stderr"
return
} else {
print Config ": unsupported: %" seq > "/dev/stderr"
return
}
}
out = out cmd pipe
# While the processing is mostly generic for all verbs,
# we'd have to distinguish non-view commands in this AWK script's output.
if (!toview)
return
# In the case of out == "", we should just explicitly pass it to the pager,
# however it currently mixes with the case of "we can't use this View=".
return out
}
function section_matches(section, type, basename, value) {
if ("Directory" in section)
return 0
if ("Type" in section) {
value = section["Type"]
if ("TypeIgnoreCase" in section &&
section["TypeIgnoreCase"] == "true") {
type = tolower(type)
value = tolower(value)
}
gsub(/\\\\/, "\\", value)
gsub(/\\ /, " ", value)
if (type !~ value)
return 0
}
if ("Regex" in section) {
value = section["Regex"]
if ("RegexIgnoreCase" in section &&
section["RegexIgnoreCase"] == "true") {
basename = tolower(basename)
value = tolower(value)
}
gsub(/\\\\/, "\\", value)
return basename ~ value
} else if ("Shell" in section) {
value = section["Shell"]
if ("RegexIgnoreCase" in section &&
section["ShellIgnoreCase"] == "true") {
basename = tolower(basename)
value = tolower(value)
}
if (value !~ /^[.]/)
return value == basename
return length(basename) >= length(value) &&
substr(basename, length(basename) - length(value) + 1) == value
}
return type != ""
}
EOF
)
command=$(process)
if [ -z "$command" ]
then "$PAGER" -- "$MC_EXT_FILENAME"
else eval "$command" | "$PAGER"
fi
case "$kind" in
view)
if [ -n "$command" ]
then eval "$command" | "$PAGER"
else "$PAGER" -- "$MC_EXT_FILENAME"
fi
;;
'')
if [ -n "$command" ]
then eval "$command"
else "$PAGER" -- "$MC_EXT_FILENAME"
fi
;;
*)
echo "Unsupported: $kind" >&2
exit 1
esac