From b4f28814b7f5cf1d2375963db81f554d470aef83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Eric=20Janouch?= Date: Sun, 7 Jan 2024 23:26:05 +0100 Subject: [PATCH] Add a deep tagger in C++ --- LICENSE | 2 +- deeptagger/CMakeLists.txt | 20 + deeptagger/FindONNXRuntime.cmake | 11 + deeptagger/README.adoc | 25 ++ deeptagger/bench.sh | 38 ++ deeptagger/deeptagger.cpp | 671 +++++++++++++++++++++++++++++++ deeptagger/download.sh | 161 ++++++++ 7 files changed, 927 insertions(+), 1 deletion(-) create mode 100644 deeptagger/CMakeLists.txt create mode 100644 deeptagger/FindONNXRuntime.cmake create mode 100644 deeptagger/README.adoc create mode 100755 deeptagger/bench.sh create mode 100644 deeptagger/deeptagger.cpp create mode 100755 deeptagger/download.sh diff --git a/LICENSE b/LICENSE index 7d13ecd..76b3811 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2023, Přemysl Eric Janouch +Copyright (c) 2023 - 2024, Přemysl Eric Janouch Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. diff --git a/deeptagger/CMakeLists.txt b/deeptagger/CMakeLists.txt new file mode 100644 index 0000000..9c10bef --- /dev/null +++ b/deeptagger/CMakeLists.txt @@ -0,0 +1,20 @@ +# Ubuntu 20.04 LTS +cmake_minimum_required (VERSION 3.16) +project (deeptagger VERSION 0.0.1 LANGUAGES CXX) + +# Hint: set ONNXRuntime_ROOT to a directory with a pre-built GitHub release. +# (Useful for development, otherwise you may need to adjust the rpath.) +set (CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}") + +find_package (ONNXRuntime REQUIRED) +find_package (PkgConfig REQUIRED) +pkg_check_modules (GM REQUIRED GraphicsMagick++) + +add_executable (deeptagger deeptagger.cpp) +target_compile_features (deeptagger PRIVATE cxx_std_17) +target_include_directories (deeptagger PRIVATE + ${GM_INCLUDE_DIRS} ${ONNXRuntime_INCLUDE_DIRS}) +target_link_directories (deeptagger PRIVATE + ${GM_LIBRARY_DIRS}) +target_link_libraries (deeptagger PRIVATE + ${GM_LIBRARIES} ${ONNXRuntime_LIBRARIES}) diff --git a/deeptagger/FindONNXRuntime.cmake b/deeptagger/FindONNXRuntime.cmake new file mode 100644 index 0000000..902c27d --- /dev/null +++ b/deeptagger/FindONNXRuntime.cmake @@ -0,0 +1,11 @@ +# Public Domain + +find_path (ONNXRuntime_INCLUDE_DIRS onnxruntime_c_api.h + PATH_SUFFIXES onnxruntime) +find_library (ONNXRuntime_LIBRARIES NAMES onnxruntime) + +include (FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS (ONNXRuntime DEFAULT_MSG + ONNXRuntime_INCLUDE_DIRS ONNXRuntime_LIBRARIES) + +mark_as_advanced (ONNXRuntime_LIBRARIES ONNXRuntime_INCLUDE_DIRS) diff --git a/deeptagger/README.adoc b/deeptagger/README.adoc new file mode 100644 index 0000000..8ea83cc --- /dev/null +++ b/deeptagger/README.adoc @@ -0,0 +1,25 @@ +deeptagger +========== + +This is an automatic image tagger/classifier written in C++, +without using any Python, and primarily targets various anime models. + +Unfortunately, you will still need Python and some luck to prepare the models, +achieved by running download.sh. You will need about 20 gigabytes of space. + +Very little effort is made to make this work on non-Unix systems. + +Getting this to work +-------------------- +To build the evaluator, install a C++ compiler, CMake, and development packages +of GraphicsMagick and ONNX Runtime. + +Prebuilt ONNX Runtime can be most conveniently downloaded from +https://github.com/microsoft/onnxruntime/releases[GitHub releases]. +Remember to install CUDA packages, such as _nvidia-cudnn_ on Debian, +if you plan on using the GPU-enabled options. + + $ cmake -DONNXRuntime_ROOT=/path/to/onnxruntime -B build + $ cmake --build build + $ ./download.sh + $ build/deeptagger models/deepdanbooru-v3-20211112-sgd-e28.model image.jpg diff --git a/deeptagger/bench.sh b/deeptagger/bench.sh new file mode 100755 index 0000000..6b62791 --- /dev/null +++ b/deeptagger/bench.sh @@ -0,0 +1,38 @@ +#!/bin/sh -e +if [ $# -lt 2 ] || ! [ -x "$1" ] +then + echo "Usage: $0 DEEPTAGGER FILE..." + echo "Run this after using download.sh, from the same directory." + exit 1 +fi + +runner=$1 +shift +log=bench.out +: >$log + +run() { + opts=$1 batch=$2 model=$3 + shift 3 + + for i in $(seq 1 3) + do + start=$(date +%s) + "$runner" $opts -b "$batch" -t 0.75 "$model" "$@" >/dev/null || : + end=$(date +%s) + printf '%s\t%s\t%s\t%s\t%s\n' \ + "$name" "$model" "$opts" "$batch" "$((end - start))" | tee -a $log + done +} + +for model in models/*.model +do + name=$(sed -n 's/^name=//p' "$model") + run "" 1 "$model" "$@" + run "" 4 "$model" "$@" + run "" 16 "$model" "$@" + + run --cpu 1 "$model" "$@" + run --cpu 4 "$model" "$@" + run --cpu 16 "$model" "$@" +done diff --git a/deeptagger/deeptagger.cpp b/deeptagger/deeptagger.cpp new file mode 100644 index 0000000..27be965 --- /dev/null +++ b/deeptagger/deeptagger.cpp @@ -0,0 +1,671 @@ +#include +#include +#include +#ifdef __APPLE__ +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static struct { + bool cpu = false; + int debug = 0; + long batch = 1; + float threshold = 0.1; + + // Execution provider name → Key → Value + std::map> options; +} g; + +// --- Configuration ----------------------------------------------------------- + +// Arguably, input normalization could be incorporated into models instead. +struct Config { + std::string name; + enum class Shape {NHWC, NCHW} shape = Shape::NHWC; + enum class Channels {RGB, BGR} channels = Channels::RGB; + bool normalize = false; + enum class Pad {WHITE, EDGE, STRETCH} pad = Pad::WHITE; + int size = -1; + bool sigmoid = false; + + std::vector tags; +}; + +static void +read_tags(const std::string &path, std::vector &tags) +{ + std::ifstream f(path); + f.exceptions(std::ifstream::badbit); + if (!f) + throw std::runtime_error("cannot read tags"); + + std::string line; + while (std::getline(f, line)) { + if (!line.empty() && line.back() == '\r') + line.erase(line.size() - 1); + tags.push_back(line); + } +} + +static void +read_field(Config &config, std::string key, std::string value) +{ + if (key == "name") { + config.name = value; + } else if (key == "shape") { + if (value == "nhwc") config.shape = Config::Shape::NHWC; + else if (value == "nchw") config.shape = Config::Shape::NCHW; + else throw std::invalid_argument("bad value for: " + key); + } else if (key == "channels") { + if (value == "rgb") config.channels = Config::Channels::RGB; + else if (value == "bgr") config.channels = Config::Channels::BGR; + else throw std::invalid_argument("bad value for: " + key); + } else if (key == "normalize") { + if (value == "true") config.normalize = true; + else if (value == "false") config.normalize = false; + else throw std::invalid_argument("bad value for: " + key); + } else if (key == "pad") { + if (value == "white") config.pad = Config::Pad::WHITE; + else if (value == "edge") config.pad = Config::Pad::EDGE; + else if (value == "stretch") config.pad = Config::Pad::STRETCH; + else throw std::invalid_argument("bad value for: " + key); + } else if (key == "size") { + config.size = std::stoi(value); + } else if (key == "interpret") { + if (value == "false") config.sigmoid = false; + else if (value == "sigmoid") config.sigmoid = true; + else throw std::invalid_argument("bad value for: " + key); + } else { + throw std::invalid_argument("unsupported config key: " + key); + } +} + +static void +read_config(Config &config, const char *path) +{ + std::ifstream f(path); + f.exceptions(std::ifstream::badbit); + if (!f) + throw std::runtime_error("cannot read configuration"); + + std::regex re(R"(^\s*([^#=]+?)\s*=\s*([^#]*?)\s*(?:#|$))", + std::regex::optimize); + std::smatch m; + + std::string line; + while (std::getline(f, line)) { + if (std::regex_match(line, m, re)) + read_field(config, m[1].str(), m[2].str()); + } + + read_tags( + std::filesystem::path(path).replace_extension("tags"), config.tags); +} + +// --- Data preparation -------------------------------------------------------- + +static float * +image_to_nhwc(float *data, Magick::Image &image, Config::Channels channels) +{ + unsigned int width = image.columns(); + unsigned int height = image.rows(); + + auto pixels = image.getConstPixels(0, 0, width, height); + switch (channels) { + case Config::Channels::RGB: + for (unsigned int y = 0; y < height; y++) { + for (unsigned int x = 0; x < width; x++) { + auto pixel = *pixels++; + *data++ = ScaleQuantumToChar(pixel.red); + *data++ = ScaleQuantumToChar(pixel.green); + *data++ = ScaleQuantumToChar(pixel.blue); + } + } + break; + case Config::Channels::BGR: + for (unsigned int y = 0; y < height; y++) { + for (unsigned int x = 0; x < width; x++) { + auto pixel = *pixels++; + *data++ = ScaleQuantumToChar(pixel.blue); + *data++ = ScaleQuantumToChar(pixel.green); + *data++ = ScaleQuantumToChar(pixel.red); + } + } + } + return data; +} + +static float * +image_to_nchw(float *data, Magick::Image &image, Config::Channels channels) +{ + unsigned int width = image.columns(); + unsigned int height = image.rows(); + + auto pixels = image.getConstPixels(0, 0, width, height), pp = pixels; + switch (channels) { + case Config::Channels::RGB: + for (unsigned int y = 0; y < height; y++) + for (unsigned int x = 0; x < width; x++) + *data++ = ScaleQuantumToChar((*pp++).red); + pp = pixels; + for (unsigned int y = 0; y < height; y++) + for (unsigned int x = 0; x < width; x++) + *data++ = ScaleQuantumToChar((*pp++).green); + pp = pixels; + for (unsigned int y = 0; y < height; y++) + for (unsigned int x = 0; x < width; x++) + *data++ = ScaleQuantumToChar((*pp++).blue); + break; + case Config::Channels::BGR: + for (unsigned int y = 0; y < height; y++) + for (unsigned int x = 0; x < width; x++) + *data++ = ScaleQuantumToChar((*pp++).blue); + pp = pixels; + for (unsigned int y = 0; y < height; y++) + for (unsigned int x = 0; x < width; x++) + *data++ = ScaleQuantumToChar((*pp++).green); + pp = pixels; + for (unsigned int y = 0; y < height; y++) + for (unsigned int x = 0; x < width; x++) + *data++ = ScaleQuantumToChar((*pp++).red); + } + return data; +} + +static Magick::Image +load(const std::string filename, + const Config &config, int64_t width, int64_t height) +{ + Magick::Image image; + try { + image.read(filename); + } catch (const Magick::Warning &warning) { + if (g.debug) + fprintf(stderr, "%s: %s\n", filename.c_str(), warning.what()); + } + + image.autoOrient(); + + Magick::Geometry adjusted(width, height); + switch (config.pad) { + case Config::Pad::EDGE: + case Config::Pad::WHITE: + adjusted.greater(true); + break; + case Config::Pad::STRETCH: + adjusted.aspect(false); + } + + image.resize(adjusted, Magick::LanczosFilter); + + // The GraphicsMagick API doesn't offer any good options. + if (config.pad == Config::Pad::EDGE) { + MagickLib::SetImageVirtualPixelMethod( + image.image(), MagickLib::EdgeVirtualPixelMethod); + + auto x = (int64_t(image.columns()) - width) / 2; + auto y = (int64_t(image.rows()) - height) / 2; + auto source = image.getConstPixels(x, y, width, height); + std::vector + pixels(source, source + width * height); + + Magick::Image edged(Magick::Geometry(width, height), "black"); + edged.classType(Magick::DirectClass); + auto target = edged.setPixels(0, 0, width, height); + memcpy(target, pixels.data(), pixels.size() * sizeof pixels[0]); + edged.syncPixels(); + + image = edged; + } + + // Center it in a square patch of white, removing any transparency. + // image.extent() could probably be used to do the same thing. + Magick::Image white(Magick::Geometry(width, height), "white"); + auto x = (white.columns() - image.columns()) / 2; + auto y = (white.rows() - image.rows()) / 2; + white.composite(image, x, y, Magick::OverCompositeOp); + white.fileName(filename); + + if (g.debug > 2) + white.display(); + + return white; +} + +// --- Inference --------------------------------------------------------------- + +static void +run(std::vector &images, const Config &config, + Ort::Session &session, std::vector shape) +{ + auto batch = shape[0] = images.size(); + + Ort::AllocatorWithDefaultOptions allocator; + auto tensor = Ort::Value::CreateTensor( + allocator, shape.data(), shape.size()); + + auto input_len = tensor.GetTensorTypeAndShapeInfo().GetElementCount(); + auto input_data = tensor.GetTensorMutableData(), pi = input_data; + for (int64_t i = 0; i < batch; i++) { + switch (config.shape) { + case Config::Shape::NCHW: + pi = image_to_nchw(pi, images.at(i), config.channels); + break; + case Config::Shape::NHWC: + pi = image_to_nhwc(pi, images.at(i), config.channels); + } + } + if (config.normalize) { + pi = input_data; + for (size_t i = 0; i < input_len; i++) + *pi++ /= 255.0; + } + + std::string input_name = + session.GetInputNameAllocated(0, allocator).get(); + std::string output_name = + session.GetOutputNameAllocated(0, allocator).get(); + + std::vector input_names = {input_name.c_str()}; + std::vector output_names = {output_name.c_str()}; + + auto outputs = session.Run(Ort::RunOptions{}, + input_names.data(), &tensor, input_names.size(), + output_names.data(), output_names.size()); + if (outputs.size() != 1 || !outputs[0].IsTensor()) { + fprintf(stderr, "Wrong output\n"); + return; + } + + auto output_len = outputs[0].GetTensorTypeAndShapeInfo().GetElementCount(); + auto output_data = outputs.front().GetTensorData(), po = output_data; + if (output_len != batch * config.tags.size()) { + fprintf(stderr, "Tags don't match the output\n"); + return; + } + + for (size_t i = 0; i < batch; i++) { + for (size_t t = 0; t < config.tags.size(); t++) { + float value = *po++; + if (config.sigmoid) + value = 1 / (1 + std::exp(-value)); + if (value > g.threshold) { + printf("%s\t%.2f\t%s\n", images.at(i).fileName().c_str(), + value, config.tags.at(t).c_str()); + } + } + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static void +parse_options(const std::string &options) +{ + auto semicolon = options.find(";"); + auto name = options.substr(0, semicolon); + auto sequence = options.substr(semicolon); + + std::map kv; + std::regex re(R"(;*([^;=]+)=([^;=]+))", std::regex::optimize); + std::sregex_iterator it(sequence.begin(), sequence.end(), re), end; + for (; it != end; ++it) + kv[it->str(1)] = it->str(2); + g.options.insert_or_assign(name, std::move(kv)); +} + +static std::tuple, std::vector> +unpack_options(const std::string &provider) +{ + std::vector keys, values; + if (g.options.count(provider)) { + for (const auto &kv : g.options.at(provider)) { + keys.push_back(kv.first.c_str()); + values.push_back(kv.second.c_str()); + } + } + return {keys, values}; +} + +static void +add_providers(Ort::SessionOptions &options) +{ + auto api = Ort::GetApi(); + auto v_providers = Ort::GetAvailableProviders(); + std::set providers(v_providers.begin(), v_providers.end()); + + if (g.debug) { + printf("Providers:"); + for (const auto &it : providers) + printf(" %s", it.c_str()); + printf("\n"); + } + + // There is a string-based AppendExecutionProvider() method, + // but it cannot be used with all providers. + // TODO: Make it possible to disable providers. + // TODO: Providers will deserve some performance tuning. + + if (g.cpu) + return; + +#ifdef __APPLE__ + if (providers.count("CoreMLExecutionProvider")) { + try { + Ort::ThrowOnError( + OrtSessionOptionsAppendExecutionProvider_CoreML(options, 0)); + } catch (const std::exception &e) { + fprintf(stderr, "CoreML unavailable: %s\n", e.what()); + } + } +#endif + +#if TENSORRT + // TensorRT should be the more performant execution provider, however: + // - it is difficult to set up (needs logging in to download), + // - with WD v1.4 ONNX models, one gets "Your ONNX model has been generated + // with INT64 weights, while TensorRT does not natively support INT64. + // Attempting to cast down to INT32." and that's not nice. + if (providers.count("TensorrtExecutionProvider")) { + OrtTensorRTProviderOptionsV2* tensorrt_options = nullptr; + Ort::ThrowOnError(api.CreateTensorRTProviderOptions(&tensorrt_options)); + auto [keys, values] = unpack_options("TensorrtExecutionProvider"); + if (!keys.empty()) { + Ort::ThrowOnError(api.UpdateTensorRTProviderOptions( + tensorrt_options, keys.data(), values.data(), keys.size())); + } + + try { + options.AppendExecutionProvider_TensorRT_V2(*tensorrt_options); + } catch (const std::exception &e) { + fprintf(stderr, "TensorRT unavailable: %s\n", e.what()); + } + api.ReleaseTensorRTProviderOptions(tensorrt_options); + } +#endif + + // See CUDA-ExecutionProvider.html for documentation. + if (providers.count("CUDAExecutionProvider")) { + OrtCUDAProviderOptionsV2* cuda_options = nullptr; + Ort::ThrowOnError(api.CreateCUDAProviderOptions(&cuda_options)); + auto [keys, values] = unpack_options("CUDAExecutionProvider"); + if (!keys.empty()) { + Ort::ThrowOnError(api.UpdateCUDAProviderOptions( + cuda_options, keys.data(), values.data(), keys.size())); + } + + try { + options.AppendExecutionProvider_CUDA_V2(*cuda_options); + } catch (const std::exception &e) { + fprintf(stderr, "CUDA unavailable: %s\n", e.what()); + } + api.ReleaseCUDAProviderOptions(cuda_options); + } + + if (providers.count("ROCMExecutionProvider")) { + OrtROCMProviderOptions rocm_options = {}; + auto [keys, values] = unpack_options("ROCMExecutionProvider"); + if (!keys.empty()) { + Ort::ThrowOnError(api.UpdateROCMProviderOptions( + &rocm_options, keys.data(), values.data(), keys.size())); + } + + try { + options.AppendExecutionProvider_ROCM(rocm_options); + } catch (const std::exception &e) { + fprintf(stderr, "ROCM unavailable: %s\n", e.what()); + } + } + + // The CPU provider is the default fallback, if everything else fails. +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static std::string +print_shape(const Ort::ConstTensorTypeAndShapeInfo &info) +{ + std::vector names(info.GetDimensionsCount()); + info.GetSymbolicDimensions(names.data(), names.size()); + + auto shape = info.GetShape(); + std::string result; + for (size_t i = 0; i < shape.size(); i++) { + if (shape[i] < 0) + result.append(names.at(i)); + else + result.append(std::to_string(shape[i])); + result.append(" x "); + } + if (!result.empty()) + result.erase(result.size() - 3); + return result; +} + +static void +print_shapes(const Ort::Session &session) +{ + Ort::AllocatorWithDefaultOptions allocator; + for (size_t i = 0; i < session.GetInputCount(); i++) { + std::string name = session.GetInputNameAllocated(i, allocator).get(); + auto info = session.GetInputTypeInfo(i); + auto shape = print_shape(info.GetTensorTypeAndShapeInfo()); + printf("Input: %s: %s\n", name.c_str(), shape.c_str()); + } + for (size_t i = 0; i < session.GetOutputCount(); i++) { + std::string name = session.GetOutputNameAllocated(i, allocator).get(); + auto info = session.GetOutputTypeInfo(i); + auto shape = print_shape(info.GetTensorTypeAndShapeInfo()); + printf("Output: %s: %s\n", name.c_str(), shape.c_str()); + } +} + +static void +infer(Ort::Env &env, const char *path, const std::vector &images) +{ + Config config; + read_config(config, path); + + Ort::SessionOptions session_options; + add_providers(session_options); + + Ort::Session session = Ort::Session(env, + std::filesystem::path(path).replace_extension("onnx").c_str(), + session_options); + + if (g.debug) + print_shapes(session); + + if (session.GetInputCount() != 1 || session.GetOutputCount() != 1) { + fprintf(stderr, "Invalid input or output shape\n"); + exit(EXIT_FAILURE); + } + + auto input_info = session.GetInputTypeInfo(0); + auto shape = input_info.GetTensorTypeAndShapeInfo().GetShape(); + if (shape.size() != 4) { + fprintf(stderr, "Incompatible input tensor format\n"); + exit(EXIT_FAILURE); + } + if (shape.at(0) > 1) { + fprintf(stderr, "Fixed batching not supported\n"); + exit(EXIT_FAILURE); + } + if (shape.at(0) >= 0 && g.batch > 1) { + fprintf(stderr, "Requested batching for a non-batching model\n"); + exit(EXIT_FAILURE); + } + + int64_t *height = {}, *width = {}, *channels = {}; + switch (config.shape) { + case Config::Shape::NCHW: + channels = &shape[1]; + height = &shape[2]; + width = &shape[3]; + break; + case Config::Shape::NHWC: + height = &shape[1]; + width = &shape[2]; + channels = &shape[3]; + break; + } + + // Variable dimensions don't combine well with batches. + if (*height < 0) + *height = config.size; + if (*width < 0) + *width = config.size; + if (*channels != 3 || *height < 1 || *width < 1) { + fprintf(stderr, "Incompatible input tensor format\n"); + return; + } + + // TODO: Image loading is heavily parallelizable. In theory. + std::vector batch; + for (const auto &filename : images) { + Magick::Image image; + try { + image = load(filename, config, *width, *height); + } catch (const std::exception &e) { + fprintf(stderr, "%s: %s\n", filename.c_str(), e.what()); + continue; + } + + if (*height != image.rows() || *width != image.columns()) { + fprintf(stderr, "%s: %s\n", filename.c_str(), "tensor mismatch"); + continue; + } + + batch.push_back(image); + if (batch.size() == g.batch) { + run(batch, config, session, shape); + batch.clear(); + } + } + if (!batch.empty()) + run(batch, config, session, shape); +} + +int +main(int argc, char *argv[]) +{ + auto invocation_name = argv[0]; + auto print_usage = [=] { + fprintf(stderr, + "Usage: %s [-b BATCH] [--cpu] [-d] [-o EP;KEY=VALUE...] " + "[-t THRESHOLD] MODEL { --pipe | [IMAGE...] }\n", invocation_name); + }; + + static option opts[] = { + {"batch", required_argument, 0, 'b'}, + {"cpu", no_argument, 0, 'c'}, + {"debug", no_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"options", required_argument, 0, 'o'}, + {"pipe", no_argument, 0, 'p'}, + {"threshold", required_argument, 0, 't'}, + {nullptr, 0, 0, 0}, + }; + + bool pipe = false; + while (1) { + int option_index = 0; + auto c = getopt_long(argc, const_cast(argv), + "b:cdho:pt:", opts, &option_index); + if (c == -1) + break; + + char *end = nullptr; + switch (c) { + case 'b': + errno = 0, g.batch = strtol(optarg, &end, 10); + if (errno || *end || g.batch < 1 || g.batch > SHRT_MAX) { + fprintf(stderr, "Batch size must be a positive number\n"); + exit(EXIT_FAILURE); + } + break; + case 'c': + g.cpu = true; + break; + case 'd': + g.debug++; + break; + case 'h': + print_usage(); + return 0; + case 'o': + parse_options(optarg); + break; + case 'p': + pipe = true; + break; + case 't': + errno = 0, g.threshold = strtod(optarg, &end); + if (errno || *end || !std::isfinite(g.threshold) || + g.threshold < 0 || g.threshold > 1) { + fprintf(stderr, "Threshold must be a number within 0..1\n"); + exit(EXIT_FAILURE); + } + break; + default: + print_usage(); + return 1; + } + } + + argv += optind; + argc -= optind; + + // TODO: There's actually no need to slurp all the lines up front. + std::vector paths; + if (pipe) { + if (argc != 1) { + print_usage(); + return 1; + } + + std::string line; + while (std::getline(std::cin, line)) + paths.push_back(line); + } else { + if (argc < 1) { + print_usage(); + return 1; + } + + paths.assign(argv + 1, argv + argc); + } + + // XXX: GraphicsMagick initializes signal handlers here, + // one needs to use MagickLib::InitializeMagickEx() + // with MAGICK_OPT_NO_SIGNAL_HANDER to prevent that. + // + // ImageMagick conveniently has the opposite default. + // + // Once processing images in parallel, consider presetting + // OMP_NUM_THREADS=1 (GM) and/or MAGICK_THREAD_LIMIT=1 (IM). + Magick::InitializeMagick(nullptr); + + OrtLoggingLevel logging = g.debug > 1 + ? ORT_LOGGING_LEVEL_VERBOSE + : ORT_LOGGING_LEVEL_WARNING; + + // Creating an environment before initializing providers in order to avoid: + // "Attempt to use DefaultLogger but none has been registered." + Ort::Env env(logging, invocation_name); + infer(env, argv[0], paths); + return 0; +} diff --git a/deeptagger/download.sh b/deeptagger/download.sh new file mode 100755 index 0000000..29f651e --- /dev/null +++ b/deeptagger/download.sh @@ -0,0 +1,161 @@ +#!/bin/sh -e +# Requirements: Python ~ 3.11, curl, unzip, git-lfs, awk +# +# This script downloads a bunch of models into the models/ directory, +# after any necessary transformations to run them using the deeptagger binary. +# +# Once it succeeds, feel free to remove everything but *.{model,tags,onnx} +git lfs install +mkdir -p models +cd models + +# Create a virtual environment for model conversion. +# +# If any of the Python stuff fails, +# retry from within a Conda environment with a different version of Python. +export VIRTUAL_ENV=$(pwd)/venv +export TF_ENABLE_ONEDNN_OPTS=0 +if ! [ -f "$VIRTUAL_ENV/ready" ] +then + python3 -m venv "$VIRTUAL_ENV" + #"$VIRTUAL_ENV/bin/pip3" install tensorflow[and-cuda] + "$VIRTUAL_ENV/bin/pip3" install tf2onnx 'deepdanbooru[tensorflow]' + touch "$VIRTUAL_ENV/ready" +fi + +status() { + echo "$(tput bold)-- $*$(tput sgr0)" +} + +# Using the deepdanbooru package makes it possible to use other models +# trained with the project. +deepdanbooru() { + local name=$1 url=$2 + status "$name" + + local basename=$(basename "$url") + if ! [ -e "$basename" ] + then curl -LO "$url" + fi + + local modelname=${basename%%.*} + if ! [ -d "$modelname" ] + then unzip -d "$modelname" "$basename" + fi + + if ! [ -e "$modelname.tags" ] + then ln "$modelname/tags.txt" "$modelname.tags" + fi + + if ! [ -d "$modelname.saved" ] + then "$VIRTUAL_ENV/bin/python3" - "$modelname" "$modelname.saved" <<-'END' + import sys + import deepdanbooru.project as ddp + model = ddp.load_model_from_project( + project_path=sys.argv[1], compile_model=False) + model.export(sys.argv[2]) + END + fi + + if ! [ -e "$modelname.onnx" ] + then "$VIRTUAL_ENV/bin/python3" -m tf2onnx.convert \ + --saved-model "$modelname.saved" --output "$modelname.onnx" + fi + + cat > "$modelname.model" <<-END + name=$name + shape=nhwc + channels=rgb + normalize=true + pad=edge + END +} + +# ONNX preconversions don't have a symbolic first dimension, thus doing our own. +wd14() { + local name=$1 repository=$2 + status "$name" + + local modelname=$(basename "$repository") + if ! [ -d "$modelname" ] + then git clone "https://huggingface.co/$repository" + fi + + # Though link the original export as well. + if ! [ -e "$modelname.onnx" ] + then ln "$modelname/model.onnx" "$modelname.onnx" + fi + + if ! [ -e "$modelname.tags" ] + then awk -F, 'NR > 1 { print $2 }' "$modelname/selected_tags.csv" \ + > "$modelname.tags" + fi + + cat > "$modelname.model" <<-END + name=$name + shape=nhwc + channels=bgr + normalize=false + pad=white + END + + if ! [ -e "batch-$modelname.onnx" ] + then "$VIRTUAL_ENV/bin/python3" -m tf2onnx.convert \ + --saved-model "$modelname" --output "batch-$modelname.onnx" + fi + + if ! [ -e "batch-$modelname.tags" ] + then ln "$modelname.tags" "batch-$modelname.tags" + fi + + if ! [ -e "batch-$modelname.model" ] + then ln "$modelname.model" "batch-$modelname.model" + fi +} + +# These models are an undocumented mess, thus using ONNX preconversions. +mldanbooru() { + local name=$1 basename=$2 + status "$name" + + if ! [ -d ml-danbooru-onnx ] + then git clone https://huggingface.co/deepghs/ml-danbooru-onnx + fi + + local modelname=${basename%%.*} + if ! [ -e "$basename" ] + then ln "ml-danbooru-onnx/$basename" + fi + + if ! [ -e "$modelname.tags" ] + then awk -F, 'NR > 1 { print $1 }' ml-danbooru-onnx/tags.csv \ + > "$modelname.tags" + fi + + cat > "$modelname.model" <<-END + name=$name + shape=nchw + channels=rgb + normalize=true + pad=stretch + size=640 + interpret=sigmoid + END +} + +status "Downloading models, beware that git-lfs doesn't indicate progress" + +deepdanbooru DeepDanbooru \ + 'https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip' + +#wd14 'WD v1.4 ViT v1' 'SmilingWolf/wd-v1-4-vit-tagger' +wd14 'WD v1.4 ViT v2' 'SmilingWolf/wd-v1-4-vit-tagger-v2' +#wd14 'WD v1.4 ConvNeXT v1' 'SmilingWolf/wd-v1-4-convnext-tagger' +wd14 'WD v1.4 ConvNeXT v2' 'SmilingWolf/wd-v1-4-convnext-tagger-v2' +wd14 'WD v1.4 ConvNeXTV2 v2' 'SmilingWolf/wd-v1-4-convnextv2-tagger-v2' +wd14 'WD v1.4 SwinV2 v2' 'SmilingWolf/wd-v1-4-swinv2-tagger-v2' +wd14 'WD v1.4 MOAT v2' 'SmilingWolf/wd-v1-4-moat-tagger-v2' + +# As suggested by author https://github.com/IrisRainbowNeko/ML-Danbooru-webui +mldanbooru 'ML-Danbooru Caformer dec-5-97527' 'ml_caformer_m36_dec-5-97527.onnx' +mldanbooru 'ML-Danbooru TResNet-D 6-30000' 'TResnet-D-FLq_ema_6-30000.onnx'