From b070df60106e1f2890119c17b12c9f53c932b587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Eric=20Janouch?= Date: Sat, 28 Dec 2024 18:59:45 +0100 Subject: [PATCH] Rewrite sdn-view AWK core in C++ sdn-view is still slower than I'd like it to be, just no longer ridiculously so. --- CMakeLists.txt | 9 +- README.adoc | 2 +- sdn-mc-ext.cpp | 222 +++++++++++++++++++++++++++++++++++++++++++++++++ sdn-view | 209 ++++++---------------------------------------- 4 files changed, 256 insertions(+), 186 deletions(-) create mode 100644 sdn-mc-ext.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 87a2c7e..d0a6042 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,9 +26,14 @@ target_compile_features (${PROJECT_NAME} PUBLIC cxx_std_14) target_compile_definitions (${PROJECT_NAME} PUBLIC -DPROJECT_NAME=\"${PROJECT_NAME}\" -DPROJECT_VERSION=\"${PROJECT_VERSION}\") +add_executable (${PROJECT_NAME}-mc-ext ${PROJECT_NAME}-mc-ext.cpp) +target_compile_features (${PROJECT_NAME}-mc-ext PUBLIC cxx_std_17) + include (GNUInstallDirs) -install (TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR}) -install (PROGRAMS ${PROJECT_NAME}-install ${PROJECT_NAME}-view +# sdn-mc-ext should be in libexec, but we prefer it in PATH. +install (TARGETS sdn sdn-mc-ext + DESTINATION ${CMAKE_INSTALL_BINDIR}) +install (PROGRAMS sdn-install sdn-view DESTINATION ${CMAKE_INSTALL_BINDIR}) install (FILES sdn.1 sdn-install.1 sdn-view.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) diff --git a/README.adoc b/README.adoc index a5caae2..8789821 100644 --- a/README.adoc +++ b/README.adoc @@ -25,7 +25,7 @@ or as a https://git.janouch.name/p/nixexprs[Nix derivation]. Building -------- -Build dependencies: CMake and/or make, a C++14 compiler, pkg-config + +Build dependencies: CMake and/or make, a C++17 compiler, pkg-config + Runtime dependencies: ncursesw, libacl (on Linux) // Working around libasciidoc's missing support for escaping it like \++ diff --git a/sdn-mc-ext.cpp b/sdn-mc-ext.cpp new file mode 100644 index 0000000..9e06760 --- /dev/null +++ b/sdn-mc-ext.cpp @@ -0,0 +1,222 @@ +// +// sdn-mc-ext: Midnight Commander extension file processor +// +// Copyright (c) 2024, PÅ™emysl Eric Janouch +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +#include +#include +#include +#include +#include +#include +#include + +// Trailing return types make C++ syntax suck considerably less +#define fun static auto + +using namespace std; + +// It is completely fine if this only modifies ASCII letters. +fun tolower (const string &s) -> string { + string result; + for (auto c : s) result += tolower (c); + return result; +} + +fun shell_escape (const string &v) -> string { + return "'" + regex_replace (v, regex {"'"}, "'\\''") + "'"; +} + +string arg_type, arg_path, arg_basename, arg_dirname, arg_verb; +unordered_map> sections; + +fun expand_command (string command) -> pair { + regex re_sequence {R"(%(%|[[:alpha:]]*\{([^}]*)\}|[[:alpha:]]+))"}; + regex re_name {R"([^{}]*)"}; + regex re_parameter {R"([^,]+")"}; + string kind, out, pipe; smatch m; + while (regex_search (command, m, re_sequence)) { + out.append (m.prefix ()); + auto seq = m.str (1); + command = m.suffix (); + + string argument = m.str (2); + if (regex_search (seq, m, re_name)) + seq = m.str (); + + if (seq == "%") { + out += "%"; + } else if (seq == "p") { + out += shell_escape (arg_basename); + } else if (seq == "f") { + out += shell_escape (arg_path); + } else if (seq == "d") { + out += shell_escape (arg_dirname); + } else if (seq == "var") { + string value; + if (auto colon = argument.find (':'); colon == argument.npos) { + if (auto v = getenv (argument.c_str ())) + value = v; + } else { + value = argument.substr (colon + 1); + if (auto v = getenv (argument.substr (0, colon).c_str ())) + value = v; + } + out += shell_escape (value); + } else if (seq == "cd") { + kind = seq; + command = regex_replace (command, regex {"^ +"}, ""); + } else if (seq == "view") { + kind = seq; + command = regex_replace (command, regex {"^ +"}, ""); + + sregex_token_iterator it (argument.begin (), argument.end (), + re_parameter, 0), end; + for (; it != end; it++) { + if (*it == "hex") + pipe.append (" | od -t x1"); + + // more(1) and less(1) either ignore or display this: + //if (*it == "nroff") + // pipe.append (" | col -b"); + } + } else if (seq == "") { + cerr << "sdn-mc-ext: prompting not supported" << endl; + return {}; + } else { + cerr << "sdn-mc-ext: unsupported: %" << seq << endl; + return {}; + } + } + return {kind, + pipe.empty () ? out.append (command) : "(" + out + ")" + pipe}; +} + +fun print_command (string cmd) { + auto command = expand_command (cmd); + cout << get<0> (command) << endl << get<1> (command) << endl; +} + +fun section_matches (const unordered_map §ion) -> bool { + if (section.count ("Directory")) + return false; + + // The configuration went through some funky changes; + // unescape \\ but leave other escapes alone. + auto filter_re = [](const string &s) { + string result; + for (size_t i = 0; i < s.length (); ) { + auto c = s[i++]; + if (c == '\\' && i < s.length ()) + if (c = s[i++]; c != '\\') + result += '\\'; + result += c; + } + return result; + }; + auto is_true = [&](const string &name) { + auto value = section.find (name); + return value != section.end () && value->second == "true"; + }; + if (auto kv = section.find ("Type"); kv != section.end ()) { + auto flags = std::regex::ECMAScript; + if (is_true ("TypeIgnoreCase")) + flags |= regex_constants::icase; + if (!regex_search (arg_type, regex {filter_re (kv->second), flags})) + return false; + } + auto basename = arg_basename; + if (auto kv = section.find ("Regex"); kv != section.end ()) { + auto flags = std::regex::ECMAScript; + if (is_true ("RegexIgnoreCase")) + flags |= regex_constants::icase; + return regex_search (basename, regex {filter_re (kv->second), flags}); + } + if (auto kv = section.find ("Shell"); kv != section.end ()) { + auto value = kv->second; + if (is_true ("ShellIgnoreCase")) { + value = tolower (value); + basename = tolower (arg_basename); + } + if (value.empty () || value[0] != '.') + return value == basename; + return basename.length () >= value.length () && + basename.substr (basename.length () - value.length ()) == value; + } + return !arg_type.empty (); +} + +fun process (const string §ion) -> bool { + auto full = sections.at (section); + if (auto include = full.find ("Include"); include != full.end ()) { + full.erase ("Open"); + full.erase ("View"); + full.erase ("Edit"); + + if (auto included = sections.find ("Include/" + include->second); + included != sections.end ()) { + for (const auto &kv : included->second) + full[kv.first] = kv.second; + } + } + if (getenv ("SDN_MC_EXT_DEBUG")) { + cerr << "[" << section << "]" << endl; + for (const auto &kv : full) + cerr << " " << kv.first << ": " << kv.second << endl; + } + if (full.count (arg_verb) && section_matches (full)) { + print_command (full[arg_verb]); + return true; + } + return false; +} + +int main (int argc, char *argv[]) { + if (argc != 6) { + cerr << "Usage: " << argv[0] + << " TYPE PATH BASENAME DIRNAME VERB < mc.ext.ini" << endl; + return 2; + } + + arg_type = argv[1]; + arg_path = argv[2], arg_basename = argv[3], arg_dirname = argv[4]; + arg_verb = argv[5]; + + string line, section; + vector order; + regex re_entry {R"(^([-\w]+) *= *(.*)$)"}; + smatch m; + while (getline (cin, line)) { + if (line.empty () || line[0] == '#') { + continue; + } else if (auto length = line.length(); + line.find_last_of ('[') == 0 && + line.find_first_of (']') == length - 1) { + order.push_back ((section = line.substr (1, length - 2))); + } else if (regex_match (line, m, re_entry)) { + sections[section][m[1]] = m[2]; + } + } + for (const auto §ion : order) { + if (section == "mc.ext.ini" || + section == "Default" || + section.substr (0, 8) == "Include/") + continue; + if (process (section)) + return 0; + } + print_command (sections["Default"][arg_verb]); + return 0; +} diff --git a/sdn-view b/sdn-view index b53db49..d698e53 100755 --- a/sdn-view +++ b/sdn-view @@ -14,198 +14,41 @@ if command -v mc >/dev/null then datadir=$(mc --datadir | sed 's/ (.*)$//') fi -export SDN_VIEW_CONFIG= +config= for dir in "$HOME"/.config/mc "$datadir" /etc/mc do if [ -n "$dir" -a -f "$dir/mc.ext.ini" ] then - SDN_VIEW_CONFIG=$dir/mc.ext.ini + config=$dir/mc.ext.ini break fi done +# This is often used in %env{} expansion, so let's be on the same page. export PAGER=${PAGER:-less} + export MC_EXT_FILENAME=$(realpath "$1") export MC_EXT_BASENAME=$(basename "$1") -export MC_EXT_CURRENTDIR=$(dirname "$1") -export SDN_VIEW_TYPE=$(file -bz "$1") -process() (awk -f - <<'EOF' -BEGIN { - if (!(Config = ENVIRON["SDN_VIEW_CONFIG"])) - exit +export MC_EXT_CURRENTDIR=$(dirname "$MC_EXT_FILENAME") +output=$(sdn-mc-ext <"$config" "$(file -Lbz "$1")" \ + "$MC_EXT_FILENAME" "$MC_EXT_BASENAME" "$MC_EXT_CURRENTDIR" View || :) +kind=$(echo "$output" | sed -n 1p) +command=$(echo "$output" | sed -n 2p) - Verb = "View" - Section = "" - while ((getline < Config) > 0) { - if (/^\s*(#.*)?$/) { - # Skip. - } else if (/^\[[^]]+\]$/) { - Sections[++SectionsLen] = Section = substr($0, 2, length($0) - 2) - } else if (/^[^=]+=[^=]*$/) { - split($0, kv, "=") - Keys[Section, kv[1]] = kv[2] - } - } - - Type = ENVIRON["SDN_VIEW_TYPE"] - Path = ENVIRON["MC_EXT_FILENAME"] - Basename = ENVIRON["MC_EXT_BASENAME"] - Dirname = ENVIRON["MC_EXT_CURRENTDIR"] - - for (i = 1; i <= SectionsLen; i++) { - if (Sections[i] == "mc.ext.ini" || - Sections[i] == "Default" || - Sections[i] ~ /^Include[\/]/) - continue - try(Sections[i]) - } - - # Not attempting any inclusions here. - print expand_command(Keys["Default", Verb]) -} - -function try(section, pair, a, key, full, include) { - for (pair in Keys) { - split(pair, a, SUBSEP) - if (a[1] == section) - full[a[2]] = Keys[pair] - } - if ("Include" in full) { - delete full["Open"] - delete full["View"] - delete full["Edit"] - include = "Include/" full["Include"] - for (pair in Keys) { - split(pair, a, SUBSEP) - if (a[1] == include) - full[a[2]] = Keys[pair] - } - } - if (ENVIRON["SDN_VIEW_DEBUG"]) { - print "[" section "]" > "/dev/stderr" - for (key in full) - print " " key ": " full[key] > "/dev/stderr" - } - if (Verb in full && section_matches(full, Type, Basename)) { - print expand_command(full[Verb]) - exit - } -} - -function shell_escape(string) { - gsub(/'/, "'\\''", string) - return "'" string "'" -} - -function expand_command(cmd, toview, out, seq, argument, value, a, pipe) { - out = "" - while (match(cmd, /%[a-zA-Z]*\{[^}]*\}|%[a-zA-Z]+|%%/)) { - out = out substr(cmd, 1, RSTART - 1) - seq = substr(cmd, RSTART + 1, RLENGTH - 1) - cmd = substr(cmd, RSTART + RLENGTH) - - argument = "" - if (match(seq, /\{.*\}$/)) { - argument = substr(seq, RSTART + 1, RLENGTH - 2) - seq = substr(seq, 1, RSTART - 1) - } - - if (seq == "%") { - out = out "%" - } else if (seq == "p") { - out = out shell_escape(Basename) - } else if (seq == "f") { - out = out shell_escape(Path) - } else if (seq == "d") { - out = out shell_escape(Dirname) - } else if (seq == "view") { - toview = 1 - - sub(/^ +/, "", cmd) - split(argument, a, /,/) - for (value in a) { - if (a[value] == "hex") - pipe = pipe " | od -t x1" - - # more(1) and less(1) either ignore or display this: - #if (a[value] == "nroff") - # pipe = pipe " | col -b" - } - } else if (seq == "var") { - value = "" - if (!match(argument, /:.*/)) { - if (argument in ENVIRON) - value = ENVIRON[argument] - } else { - value = substr(argument, RSTART + 1) - argument = substr(argument, 1, RSTART - 1) - if (argument in ENVIRON) - value = ENVIRON[argument] - } - out = out shell_escape(value) - } else if (seq == "") { - print Config ": prompting not supported" > "/dev/stderr" - return - } else { - print Config ": unsupported: %" seq > "/dev/stderr" - return - } - } - out = out cmd pipe - - # While the processing is mostly generic for all verbs, - # we'd have to distinguish non-view commands in this AWK script's output. - if (!toview) - return - - # In the case of out == "", we should just explicitly pass it to the pager, - # however it currently mixes with the case of "we can't use this View=". - return out -} - -function section_matches(section, type, basename, value) { - if ("Directory" in section) - return 0 - - if ("Type" in section) { - value = section["Type"] - if ("TypeIgnoreCase" in section && - section["TypeIgnoreCase"] == "true") { - type = tolower(type) - value = tolower(value) - } - gsub(/\\\\/, "\\", value) - gsub(/\\ /, " ", value) - if (type !~ value) - return 0 - } - if ("Regex" in section) { - value = section["Regex"] - if ("RegexIgnoreCase" in section && - section["RegexIgnoreCase"] == "true") { - basename = tolower(basename) - value = tolower(value) - } - gsub(/\\\\/, "\\", value) - return basename ~ value - } else if ("Shell" in section) { - value = section["Shell"] - if ("RegexIgnoreCase" in section && - section["ShellIgnoreCase"] == "true") { - basename = tolower(basename) - value = tolower(value) - } - if (value !~ /^[.]/) - return value == basename - return length(basename) >= length(value) && - substr(basename, length(basename) - length(value) + 1) == value - } - return type != "" -} -EOF -) -command=$(process) -if [ -z "$command" ] -then "$PAGER" -- "$MC_EXT_FILENAME" -else eval "$command" | "$PAGER" -fi +case "$kind" in +view) + if [ -n "$command" ] + then eval "$command" | "$PAGER" + else "$PAGER" -- "$MC_EXT_FILENAME" + fi + ;; +'') + if [ -n "$command" ] + then eval "$command" + else "$PAGER" -- "$MC_EXT_FILENAME" + fi + ;; +*) + echo "Unsupported: $kind" >&2 + exit 1 +esac