Compare commits

...

10 Commits

Author SHA1 Message Date
181ab5a8e7 Optimize /api/similar
All checks were successful
Alpine 3.20 Success
Debian Bookworm Success
2024-12-29 18:10:15 +01:00
fd192310c7 Add a forget function to dispose of orphans
All checks were successful
Alpine 3.20 Success
Debian Bookworm Success
Previously, there was no way of removing images from the database.
2024-12-29 16:22:50 +01:00
b73e0b4622 Order orphans by path
All checks were successful
Alpine 3.20 Success
Debian Bookworm Success
It costs more cycles, but the SHA-1 they got implicitly ordered by
is pseudo-random.
2024-12-29 14:47:11 +01:00
0530c5d95f Fix /api/orphans with removed parent nodes 2024-12-29 14:17:07 +01:00
ce2e58b6bc Fix extremely slow removals 2024-12-29 13:41:07 +01:00
ca462ac005 Remember to optimize the database 2024-12-29 12:32:44 +01:00
e895beadb7 Add a check option to garbage collect DB files
All checks were successful
Alpine 3.20 Success
Debian Bookworm Success
2024-12-21 12:18:54 +01:00
615af97043 Add a sync option to exclude paths by regexp 2024-12-21 11:12:00 +01:00
595db869e5 Add .gitignore 2024-12-21 09:38:44 +01:00
537b48dc22 deeptagger: flush batches
All checks were successful
Alpine 3.20 Success
Debian Bookworm Success
So that crashes do not disturb the output as much.
2024-12-14 22:56:26 +01:00
5 changed files with 181 additions and 33 deletions

11
.gitignore vendored Normal file
View File

@@ -0,0 +1,11 @@
/gallery
/initialize.go
/public/mithril.js
/gallery.cflags
/gallery.config
/gallery.creator
/gallery.creator.user
/gallery.cxxflags
/gallery.files
/gallery.includes

View File

@@ -315,6 +315,7 @@ run(std::vector<Magick::Image> &images, const Config &config,
}
}
}
fflush(stdout);
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

View File

@@ -23,6 +23,7 @@ CREATE TABLE IF NOT EXISTS node(
) STRICT;
CREATE INDEX IF NOT EXISTS node__sha1 ON node(sha1);
CREATE INDEX IF NOT EXISTS node__parent ON node(parent);
CREATE UNIQUE INDEX IF NOT EXISTS node__parent_name
ON node(IFNULL(parent, 0), name);

192
main.go
View File

@@ -94,10 +94,15 @@ func init() {
}
func openDB(directory string) error {
galleryDirectory = directory
var err error
db, err = sql.Open("sqlite3_custom", "file:"+filepath.Join(directory,
nameOfDB+"?_foreign_keys=1&_busy_timeout=1000"))
galleryDirectory = directory
if err != nil {
return err
}
_, err = db.Exec(initializeSQL)
return err
}
@@ -303,10 +308,6 @@ func cmdInit(fs *flag.FlagSet, args []string) error {
return err
}
if _, err := db.Exec(initializeSQL); err != nil {
return err
}
// XXX: There's technically no reason to keep images as symlinks,
// we might just keep absolute paths in the database as well.
if err := os.MkdirAll(
@@ -657,7 +658,9 @@ func getOrphanReplacement(webPath string) (*webOrphanImage, error) {
}
parent, err := idForDirectoryPath(tx, path[:len(path)-1], false)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, nil
} else if err != nil {
return nil, err
}
@@ -684,7 +687,8 @@ func getOrphans() (result []webOrphan, err error) {
FROM orphan AS o
JOIN image AS i ON o.sha1 = i.sha1
LEFT JOIN tag_assignment AS ta ON o.sha1 = ta.sha1
GROUP BY o.sha1`)
GROUP BY o.sha1
ORDER BY path`)
if err != nil {
return nil, err
}
@@ -841,15 +845,17 @@ type webSimilarImage struct {
func getSimilar(sha1 string, dhash int64, pixels int64, distance int) (
result []webSimilarImage, err error) {
// For distance ∈ {0, 1}, this query is quite inefficient.
// In exchange, it's generic.
//
// If there's a dhash, there should also be thumbnail dimensions,
// so not bothering with IFNULL on them.
rows, err := db.Query(`
SELECT sha1, width * height, IFNULL(thumbw, 0), IFNULL(thumbh, 0)
FROM image WHERE sha1 <> ? AND dhash IS NOT NULL
// If there's a dhash, there should also be thumbnail dimensions.
var rows *sql.Rows
common := `SELECT sha1, width * height, IFNULL(thumbw, 0), IFNULL(thumbh, 0)
FROM image WHERE sha1 <> ? AND `
if distance == 0 {
rows, err = db.Query(common+`dhash = ?`, sha1, dhash)
} else {
// This is generic, but quite inefficient for distance ∈ {0, 1}.
rows, err = db.Query(common+`dhash IS NOT NULL
AND hamming(dhash, ?) = ?`, sha1, dhash, distance)
}
if err != nil {
return nil, err
}
@@ -1286,6 +1292,9 @@ type syncContext struct {
stmtDisposeSub *sql.Stmt
stmtDisposeAll *sql.Stmt
// exclude specifies filesystem paths that should be seen as missing.
exclude *regexp.Regexp
// linked tracks which image hashes we've checked so far in the run.
linked map[string]struct{}
}
@@ -1694,6 +1703,12 @@ func syncDirectory(c *syncContext, dbParent int64, fsPath string) error {
fs = nil
}
if c.exclude != nil {
fs = slices.DeleteFunc(fs, func(f syncFile) bool {
return c.exclude.MatchString(filepath.Join(fsPath, f.fsName))
})
}
// Convert differences to a form more convenient for processing.
iDB, iFS, pairs := 0, 0, []syncPair{}
for iDB < len(db) && iFS < len(fs) {
@@ -1869,9 +1884,21 @@ const disposeCTE = `WITH RECURSIVE
HAVING count = total
)`
type excludeRE struct{ re *regexp.Regexp }
func (re *excludeRE) String() string { return fmt.Sprintf("%v", re.re) }
func (re *excludeRE) Set(value string) error {
var err error
re.re, err = regexp.Compile(value)
return err
}
// cmdSync ensures the given (sub)roots are accurately reflected
// in the database.
func cmdSync(fs *flag.FlagSet, args []string) error {
var exclude excludeRE
fs.Var(&exclude, "exclude", "exclude paths matching regular expression")
fullpaths := fs.Bool("fullpaths", false, "don't basename arguments")
if err := fs.Parse(args); err != nil {
return err
@@ -1909,7 +1936,7 @@ func cmdSync(fs *flag.FlagSet, args []string) error {
}
c := syncContext{ctx: ctx, tx: tx, pb: newProgressBar(-1),
linked: make(map[string]struct{})}
exclude: exclude.re, linked: make(map[string]struct{})}
defer c.pb.Stop()
if c.stmtOrphan, err = c.tx.Prepare(disposeCTE + `
@@ -2005,6 +2032,88 @@ func cmdRemove(fs *flag.FlagSet, args []string) error {
return tx.Commit()
}
// --- Forgetting --------------------------------------------------------------
// cmdForget is for purging orphaned images from the database.
func cmdForget(fs *flag.FlagSet, args []string) error {
if err := fs.Parse(args); err != nil {
return err
}
if fs.NArg() < 2 {
return errWrongUsage
}
if err := openDB(fs.Arg(0)); err != nil {
return err
}
tx, err := db.Begin()
if err != nil {
return err
}
defer tx.Rollback()
// Creating a temporary database seems justifiable in this case.
_, err = tx.Exec(
`CREATE TEMPORARY TABLE forgotten (sha1 TEXT PRIMARY KEY)`)
if err != nil {
return err
}
stmt, err := tx.Prepare(`INSERT INTO forgotten (sha1) VALUES (?)`)
if err != nil {
return err
}
defer stmt.Close()
for _, sha1 := range fs.Args()[1:] {
if _, err := stmt.Exec(sha1); err != nil {
return err
}
}
rows, err := tx.Query(`DELETE FROM forgotten
WHERE sha1 IN (SELECT sha1 FROM node)
OR sha1 NOT IN (SELECT sha1 FROM image)
RETURNING sha1`)
if err != nil {
return err
}
defer rows.Close()
for rows.Next() {
var sha1 string
if err := rows.Scan(&sha1); err != nil {
return err
}
log.Printf("not an orphan or not known at all: %s", sha1)
}
if _, err = tx.Exec(`
DELETE FROM tag_assignment WHERE sha1 IN (SELECT sha1 FROM forgotten);
DELETE FROM orphan WHERE sha1 IN (SELECT sha1 FROM forgotten);
DELETE FROM image WHERE sha1 IN (SELECT sha1 FROM forgotten);
`); err != nil {
return err
}
rows, err = tx.Query(`SELECT sha1 FROM forgotten`)
if err != nil {
return err
}
defer rows.Close()
for rows.Next() {
var sha1 string
if err := rows.Scan(&sha1); err != nil {
return err
}
if err := os.Remove(imagePath(sha1)); err != nil &&
!os.IsNotExist(err) {
log.Printf("%s", err)
}
if err := os.Remove(thumbPath(sha1)); err != nil &&
!os.IsNotExist(err) {
log.Printf("%s", err)
}
}
return tx.Commit()
}
// --- Tagging -----------------------------------------------------------------
// cmdTag mass imports tags from data passed on stdin as a TSV
@@ -2127,36 +2236,54 @@ func collectFileListing(root string) (paths []string, err error) {
return
}
func checkFiles(root, suffix string, hashes []string) (bool, []string, error) {
func checkFiles(gc bool,
root, suffix string, hashes []string) (bool, []string, error) {
db := hashesToFileListing(root, suffix, hashes)
fs, err := collectFileListing(root)
if err != nil {
return false, nil, err
}
iDB, iFS, ok, intersection := 0, 0, true, []string{}
// There are two legitimate cases of FS-only database files:
// 1. There is no code to unlink images at all
// (although sync should create orphan records for everything).
// 2. thumbnail: failures may result in an unreferenced garbage image.
ok := true
onlyDB := func(path string) {
ok = false
fmt.Printf("only in DB: %s\n", path)
}
onlyFS := func(path string) {
if !gc {
ok = false
fmt.Printf("only in FS: %s\n", path)
} else if err := os.Remove(path); err != nil {
ok = false
fmt.Printf("only in FS (removing failed): %s: %s\n", path, err)
} else {
fmt.Printf("only in FS (removing): %s\n", path)
}
}
iDB, iFS, intersection := 0, 0, []string{}
for iDB < len(db) && iFS < len(fs) {
if db[iDB] == fs[iFS] {
intersection = append(intersection, db[iDB])
iDB++
iFS++
} else if db[iDB] < fs[iFS] {
ok = false
fmt.Printf("only in DB: %s\n", db[iDB])
onlyDB(db[iDB])
iDB++
} else {
ok = false
fmt.Printf("only in FS: %s\n", fs[iFS])
onlyFS(fs[iFS])
iFS++
}
}
for _, path := range db[iDB:] {
ok = false
fmt.Printf("only in DB: %s\n", path)
onlyDB(path)
}
for _, path := range fs[iFS:] {
ok = false
fmt.Printf("only in FS: %s\n", path)
onlyFS(path)
}
return ok, intersection, nil
}
@@ -2204,6 +2331,7 @@ func checkHashes(paths []string) (bool, error) {
// cmdCheck carries out various database consistency checks.
func cmdCheck(fs *flag.FlagSet, args []string) error {
full := fs.Bool("full", false, "verify image hashes")
gc := fs.Bool("gc", false, "garbage collect database files")
if err := fs.Parse(args); err != nil {
return err
}
@@ -2240,13 +2368,13 @@ func cmdCheck(fs *flag.FlagSet, args []string) error {
// This somewhat duplicates {image,thumb}Path().
log.Println("checking SQL against filesystem")
okImages, intersection, err := checkFiles(
okImages, intersection, err := checkFiles(*gc,
filepath.Join(galleryDirectory, nameOfImageRoot), "", allSHA1)
if err != nil {
return err
}
okThumbs, _, err := checkFiles(
okThumbs, _, err := checkFiles(*gc,
filepath.Join(galleryDirectory, nameOfThumbRoot), ".webp", thumbSHA1)
if err != nil {
return err
@@ -2255,11 +2383,11 @@ func cmdCheck(fs *flag.FlagSet, args []string) error {
ok = false
}
log.Println("checking for dead symlinks")
log.Println("checking for dead symlinks (should become orphans on sync)")
for _, path := range intersection {
if _, err := os.Stat(path); err != nil {
ok = false
fmt.Printf("%s: %s\n", path, err)
fmt.Printf("%s: %s\n", path, err.(*os.PathError).Unwrap())
}
}
@@ -2597,6 +2725,7 @@ var commands = map[string]struct {
"tag": {cmdTag, "GD SPACE [DESCRIPTION]", "Import tags."},
"sync": {cmdSync, "GD ROOT...", "Synchronise with the filesystem."},
"remove": {cmdRemove, "GD PATH...", "Remove database subtrees."},
"forget": {cmdForget, "GD SHA1...", "Dispose of orphans."},
"check": {cmdCheck, "GD", "Run consistency checks."},
"thumbnail": {cmdThumbnail, "GD [SHA1...]", "Generate thumbnails."},
"dhash": {cmdDhash, "GD [SHA1...]", "Compute perceptual hashes."},
@@ -2660,6 +2789,9 @@ func main() {
// Note that the database object has a closing finalizer,
// we just additionally print any errors coming from there.
if db != nil {
if _, err := db.Exec(`PRAGMA optimize`); err != nil {
log.Println(err)
}
if err := db.Close(); err != nil {
log.Println(err)
}

View File

@@ -16,6 +16,9 @@ sha1duplicate=$sha1
cp $input/Test/dhash.png \
$input/Test/multiple-paths.png
gen -seed 15 -size 256x256 plasma:fractal \
$input/Test/excluded.png
gen -seed 20 -size 160x128 plasma:fractal \
-bordercolor transparent -border 64 \
$input/Test/transparent-wide.png
@@ -36,7 +39,7 @@ gen $input/Test/animation-small.gif \
$input/Test/video.mp4
./gallery init $target
./gallery sync $target $input "$@"
./gallery sync -exclude '/excluded[.]' $target $input "$@"
./gallery thumbnail $target
./gallery dhash $target
./gallery tag $target test "Test space" <<-END
@@ -47,7 +50,7 @@ END
# TODO: Test all the various possible sync transitions.
mv $input/Test $input/Plasma
./gallery sync $target $input
./gallery sync -exclude '/excluded[.]' $target $input
./gallery web $target :8080 &
web=$!