Compare commits

...

3 Commits

Author SHA1 Message Date
e895beadb7
Add a check option to garbage collect DB files
All checks were successful
Alpine 3.20 Success
Debian Bookworm Success
2024-12-21 12:18:54 +01:00
615af97043
Add a sync option to exclude paths by regexp 2024-12-21 11:12:00 +01:00
595db869e5
Add .gitignore 2024-12-21 09:38:44 +01:00
3 changed files with 71 additions and 17 deletions

11
.gitignore vendored Normal file
View File

@ -0,0 +1,11 @@
/gallery
/initialize.go
/public/mithril.js
/gallery.cflags
/gallery.config
/gallery.creator
/gallery.creator.user
/gallery.cxxflags
/gallery.files
/gallery.includes

70
main.go
View File

@ -1286,6 +1286,9 @@ type syncContext struct {
stmtDisposeSub *sql.Stmt
stmtDisposeAll *sql.Stmt
// exclude specifies filesystem paths that should be seen as missing.
exclude *regexp.Regexp
// linked tracks which image hashes we've checked so far in the run.
linked map[string]struct{}
}
@ -1694,6 +1697,12 @@ func syncDirectory(c *syncContext, dbParent int64, fsPath string) error {
fs = nil
}
if c.exclude != nil {
fs = slices.DeleteFunc(fs, func(f syncFile) bool {
return c.exclude.MatchString(filepath.Join(fsPath, f.fsName))
})
}
// Convert differences to a form more convenient for processing.
iDB, iFS, pairs := 0, 0, []syncPair{}
for iDB < len(db) && iFS < len(fs) {
@ -1869,9 +1878,21 @@ const disposeCTE = `WITH RECURSIVE
HAVING count = total
)`
type excludeRE struct{ re *regexp.Regexp }
func (re *excludeRE) String() string { return fmt.Sprintf("%v", re.re) }
func (re *excludeRE) Set(value string) error {
var err error
re.re, err = regexp.Compile(value)
return err
}
// cmdSync ensures the given (sub)roots are accurately reflected
// in the database.
func cmdSync(fs *flag.FlagSet, args []string) error {
var exclude excludeRE
fs.Var(&exclude, "exclude", "exclude paths matching regular expression")
fullpaths := fs.Bool("fullpaths", false, "don't basename arguments")
if err := fs.Parse(args); err != nil {
return err
@ -1909,7 +1930,7 @@ func cmdSync(fs *flag.FlagSet, args []string) error {
}
c := syncContext{ctx: ctx, tx: tx, pb: newProgressBar(-1),
linked: make(map[string]struct{})}
exclude: exclude.re, linked: make(map[string]struct{})}
defer c.pb.Stop()
if c.stmtOrphan, err = c.tx.Prepare(disposeCTE + `
@ -2127,36 +2148,54 @@ func collectFileListing(root string) (paths []string, err error) {
return
}
func checkFiles(root, suffix string, hashes []string) (bool, []string, error) {
func checkFiles(gc bool,
root, suffix string, hashes []string) (bool, []string, error) {
db := hashesToFileListing(root, suffix, hashes)
fs, err := collectFileListing(root)
if err != nil {
return false, nil, err
}
iDB, iFS, ok, intersection := 0, 0, true, []string{}
// There are two legitimate cases of FS-only database files:
// 1. There is no code to unlink images at all
// (although sync should create orphan records for everything).
// 2. thumbnail: failures may result in an unreferenced garbage image.
ok := true
onlyDB := func(path string) {
ok = false
fmt.Printf("only in DB: %s\n", path)
}
onlyFS := func(path string) {
if !gc {
ok = false
fmt.Printf("only in FS: %s\n", path)
} else if err := os.Remove(path); err != nil {
ok = false
fmt.Printf("only in FS (removing failed): %s: %s\n", path, err)
} else {
fmt.Printf("only in FS (removing): %s\n", path)
}
}
iDB, iFS, intersection := 0, 0, []string{}
for iDB < len(db) && iFS < len(fs) {
if db[iDB] == fs[iFS] {
intersection = append(intersection, db[iDB])
iDB++
iFS++
} else if db[iDB] < fs[iFS] {
ok = false
fmt.Printf("only in DB: %s\n", db[iDB])
onlyDB(db[iDB])
iDB++
} else {
ok = false
fmt.Printf("only in FS: %s\n", fs[iFS])
onlyFS(fs[iFS])
iFS++
}
}
for _, path := range db[iDB:] {
ok = false
fmt.Printf("only in DB: %s\n", path)
onlyDB(path)
}
for _, path := range fs[iFS:] {
ok = false
fmt.Printf("only in FS: %s\n", path)
onlyFS(path)
}
return ok, intersection, nil
}
@ -2204,6 +2243,7 @@ func checkHashes(paths []string) (bool, error) {
// cmdCheck carries out various database consistency checks.
func cmdCheck(fs *flag.FlagSet, args []string) error {
full := fs.Bool("full", false, "verify image hashes")
gc := fs.Bool("gc", false, "garbage collect database files")
if err := fs.Parse(args); err != nil {
return err
}
@ -2240,13 +2280,13 @@ func cmdCheck(fs *flag.FlagSet, args []string) error {
// This somewhat duplicates {image,thumb}Path().
log.Println("checking SQL against filesystem")
okImages, intersection, err := checkFiles(
okImages, intersection, err := checkFiles(*gc,
filepath.Join(galleryDirectory, nameOfImageRoot), "", allSHA1)
if err != nil {
return err
}
okThumbs, _, err := checkFiles(
okThumbs, _, err := checkFiles(*gc,
filepath.Join(galleryDirectory, nameOfThumbRoot), ".webp", thumbSHA1)
if err != nil {
return err
@ -2255,11 +2295,11 @@ func cmdCheck(fs *flag.FlagSet, args []string) error {
ok = false
}
log.Println("checking for dead symlinks")
log.Println("checking for dead symlinks (should become orphans on sync)")
for _, path := range intersection {
if _, err := os.Stat(path); err != nil {
ok = false
fmt.Printf("%s: %s\n", path, err)
fmt.Printf("%s: %s\n", path, err.(*os.PathError).Unwrap())
}
}

View File

@ -16,6 +16,9 @@ sha1duplicate=$sha1
cp $input/Test/dhash.png \
$input/Test/multiple-paths.png
gen -seed 15 -size 256x256 plasma:fractal \
$input/Test/excluded.png
gen -seed 20 -size 160x128 plasma:fractal \
-bordercolor transparent -border 64 \
$input/Test/transparent-wide.png
@ -36,7 +39,7 @@ gen $input/Test/animation-small.gif \
$input/Test/video.mp4
./gallery init $target
./gallery sync $target $input "$@"
./gallery sync -exclude '/excluded[.]' $target $input "$@"
./gallery thumbnail $target
./gallery dhash $target
./gallery tag $target test "Test space" <<-END
@ -47,7 +50,7 @@ END
# TODO: Test all the various possible sync transitions.
mv $input/Test $input/Plasma
./gallery sync $target $input
./gallery sync -exclude '/excluded[.]' $target $input
./gallery web $target :8080 &
web=$!