Compare commits

..

20 Commits

Author SHA1 Message Date
181ab5a8e7 Optimize /api/similar
All checks were successful
Alpine 3.20 Success
Debian Bookworm Success
2024-12-29 18:10:15 +01:00
fd192310c7 Add a forget function to dispose of orphans
All checks were successful
Alpine 3.20 Success
Debian Bookworm Success
Previously, there was no way of removing images from the database.
2024-12-29 16:22:50 +01:00
b73e0b4622 Order orphans by path
All checks were successful
Alpine 3.20 Success
Debian Bookworm Success
It costs more cycles, but the SHA-1 they got implicitly ordered by
is pseudo-random.
2024-12-29 14:47:11 +01:00
0530c5d95f Fix /api/orphans with removed parent nodes 2024-12-29 14:17:07 +01:00
ce2e58b6bc Fix extremely slow removals 2024-12-29 13:41:07 +01:00
ca462ac005 Remember to optimize the database 2024-12-29 12:32:44 +01:00
e895beadb7 Add a check option to garbage collect DB files
All checks were successful
Alpine 3.20 Success
Debian Bookworm Success
2024-12-21 12:18:54 +01:00
615af97043 Add a sync option to exclude paths by regexp 2024-12-21 11:12:00 +01:00
595db869e5 Add .gitignore 2024-12-21 09:38:44 +01:00
537b48dc22 deeptagger: flush batches
All checks were successful
Alpine 3.20 Success
Debian Bookworm Success
So that crashes do not disturb the output as much.
2024-12-14 22:56:26 +01:00
2c09745a9f deeptagger: fix README.adoc instructions
All checks were successful
Alpine 3.20 Success
Debian Bookworm Success
The images are under normal circumstances all symlinks.

What we're actually trying to express is `-not -type d`,
however that is not completely portable.
2024-12-08 22:16:11 +01:00
beb7c5e337 gallery: create DB directory in initialization
So that README.adoc instructions actually work.
2024-12-08 22:09:11 +01:00
19705527a0 Cleanup 2024-02-13 15:44:42 +01:00
9e22bd0e20 gallery: improve the README
All checks were successful
Alpine 3.19 Success
Debian Bookworm Success
2024-01-27 18:30:57 +01:00
d27d8655bb gallery: make it reverse proxy friendly 2024-01-27 18:09:48 +01:00
6d75ec60bf gallery: go back to ImageMagick v6
To cater to Debian.
2024-01-27 18:09:07 +01:00
84a94933b3 gallery: make it possible to collapse tag spaces 2024-01-23 11:30:55 +01:00
5e0e9f8a42 gallery: clean up, search in a transaction 2024-01-22 19:52:37 +01:00
083739fd4e gallery: implement AND/NOT for tag search 2024-01-22 19:52:35 +01:00
4f174972e3 gallery: move out a query from CTE 2024-01-22 19:51:13 +01:00
10 changed files with 415 additions and 141 deletions

11
.gitignore vendored Normal file
View File

@@ -0,0 +1,11 @@
/gallery
/initialize.go
/public/mithril.js
/gallery.cflags
/gallery.config
/gallery.creator
/gallery.creator.user
/gallery.cxxflags
/gallery.files
/gallery.includes

View File

@@ -8,10 +8,32 @@ and query your collections in various ways.
All media is content-addressed by its SHA-1 hash value, and at your option
also perceptually hashed. Duplicate search is an essential feature.
Prerequisites: Go, ImageMagick v7, xdg-utils
The gallery is designed for simplicity, and easy interoperability.
sqlite3, curl, jq, and the filesystem will take you a long way.
Prerequisites: Go, ImageMagick, xdg-utils
ImageMagick v7 is preferred, it doesn't shoot out of memory as often.
Getting it to work
------------------
# apt install build-essential git golang imagemagick xdg-utils
$ git clone https://git.janouch.name/p/gallery.git
$ cd gallery
$ make
$ ./gallery init G
$ ./gallery sync G ~/Pictures
$ ./gallery thumbnail G # parallelized, with memory limits
$ ./gallery -threads 1 thumbnail G # one thread only gets more memory
$ ./gallery dhash G
$ ./gallery web G :8080
The intended mode of use is running daily automated sync/thumbnail/dhash/tag
batches in a cron job, or from a system timer. See _test.sh_ for usage hints.
batches in a cron job, or from a systemd timer.
The _web_ command needs to see the _public_ directory,
and is friendly to reverse proxying.
Demo
----
https://holedigging.club/gallery/

View File

@@ -53,7 +53,7 @@ Tagging galleries
The appropriate invocation depends on your machine, and the chosen model.
Unless you have a powerful machine, or use a fast model, it may take forever.
$ find "$GALLERY/images" -type f \
$ find "$GALLERY/images" -type l \
| build/deeptagger --pipe -b 16 -t 0.5 \
models/ml_caformer_m36_dec-5-97527.model \
| sed 's|[^\t]*/||' \

View File

@@ -28,11 +28,9 @@ run() {
for model in models/*.model
do
name=$(sed -n 's/^name=//p' "$model")
run "" 1 "$model" "$@"
run "" 4 "$model" "$@"
run "" 16 "$model" "$@"
run --cpu 1 "$model" "$@"
run --cpu 4 "$model" "$@"
run --cpu 16 "$model" "$@"
for batch in 1 4 16
do
run "" $batch "$model" "$@"
run --cpu $batch "$model" "$@"
done
done

View File

@@ -315,6 +315,7 @@ run(std::vector<Magick::Image> &images, const Config &config,
}
}
}
fflush(stdout);
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

View File

@@ -23,6 +23,7 @@ CREATE TABLE IF NOT EXISTS node(
) STRICT;
CREATE INDEX IF NOT EXISTS node__sha1 ON node(sha1);
CREATE INDEX IF NOT EXISTS node__parent ON node(parent);
CREATE UNIQUE INDEX IF NOT EXISTS node__parent_name
ON node(IFNULL(parent, 0), name);
@@ -76,7 +77,7 @@ CREATE TABLE IF NOT EXISTS tag_space(
id INTEGER NOT NULL,
name TEXT NOT NULL,
description TEXT,
CHECK (name NOT LIKE '%:%'),
CHECK (name NOT LIKE '%:%' AND name NOT LIKE '-%'),
PRIMARY KEY (id)
) STRICT;

449
main.go
View File

@@ -62,19 +62,47 @@ func hammingDistance(a, b int64) int {
return bits.OnesCount64(uint64(a) ^ uint64(b))
}
type productAggregator float64
func (pa *productAggregator) Step(v float64) {
*pa = productAggregator(float64(*pa) * v)
}
func (pa *productAggregator) Done() float64 {
return float64(*pa)
}
func newProductAggregator() *productAggregator {
pa := productAggregator(1)
return &pa
}
func init() {
sql.Register("sqlite3_custom", &sqlite3.SQLiteDriver{
ConnectHook: func(conn *sqlite3.SQLiteConn) error {
return conn.RegisterFunc("hamming", hammingDistance, true /*pure*/)
if err := conn.RegisterFunc(
"hamming", hammingDistance, true /*pure*/); err != nil {
return err
}
if err := conn.RegisterAggregator(
"product", newProductAggregator, true /*pure*/); err != nil {
return err
}
return nil
},
})
}
func openDB(directory string) error {
galleryDirectory = directory
var err error
db, err = sql.Open("sqlite3_custom", "file:"+filepath.Join(directory,
nameOfDB+"?_foreign_keys=1&_busy_timeout=1000"))
galleryDirectory = directory
if err != nil {
return err
}
_, err = db.Exec(initializeSQL)
return err
}
@@ -273,11 +301,10 @@ func cmdInit(fs *flag.FlagSet, args []string) error {
if fs.NArg() != 1 {
return errWrongUsage
}
if err := openDB(fs.Arg(0)); err != nil {
if err := os.MkdirAll(fs.Arg(0), 0755); err != nil {
return err
}
if _, err := db.Exec(initializeSQL); err != nil {
if err := openDB(fs.Arg(0)); err != nil {
return err
}
@@ -294,49 +321,7 @@ func cmdInit(fs *flag.FlagSet, args []string) error {
return nil
}
// --- Web ---------------------------------------------------------------------
var hashRE = regexp.MustCompile(`^/.*?/([0-9a-f]{40})$`)
var staticHandler http.Handler
var page = template.Must(template.New("/").Parse(`<!DOCTYPE html><html><head>
<title>Gallery</title>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel=stylesheet href=style.css>
</head><body>
<noscript>This is a web application, and requires Javascript.</noscript>
<script src=mithril.js></script>
<script src=gallery.js></script>
</body></html>`))
func handleRequest(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/" {
staticHandler.ServeHTTP(w, r)
return
}
if err := page.Execute(w, nil); err != nil {
log.Println(err)
}
}
func handleImages(w http.ResponseWriter, r *http.Request) {
if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil {
http.NotFound(w, r)
} else {
http.ServeFile(w, r, imagePath(m[1]))
}
}
func handleThumbs(w http.ResponseWriter, r *http.Request) {
if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil {
http.NotFound(w, r)
} else {
http.ServeFile(w, r, thumbPath(m[1]))
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// --- API: Browse -------------------------------------------------------------
func getSubdirectories(tx *sql.Tx, parent int64) (names []string, err error) {
return dbCollectStrings(`SELECT name FROM node
@@ -416,7 +401,7 @@ func handleAPIBrowse(w http.ResponseWriter, r *http.Request) {
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// --- API: Tags ---------------------------------------------------------------
type webTagNamespace struct {
Description string `json:"description"`
@@ -502,7 +487,7 @@ func handleAPITags(w http.ResponseWriter, r *http.Request) {
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// --- API: Duplicates ---------------------------------------------------------
type webDuplicateImage struct {
SHA1 string `json:"sha1"`
@@ -645,7 +630,7 @@ func handleAPIDuplicates(w http.ResponseWriter, r *http.Request) {
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// --- API: Orphans ------------------------------------------------------------
type webOrphanImage struct {
SHA1 string `json:"sha1"`
@@ -673,7 +658,9 @@ func getOrphanReplacement(webPath string) (*webOrphanImage, error) {
}
parent, err := idForDirectoryPath(tx, path[:len(path)-1], false)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, nil
} else if err != nil {
return nil, err
}
@@ -700,7 +687,8 @@ func getOrphans() (result []webOrphan, err error) {
FROM orphan AS o
JOIN image AS i ON o.sha1 = i.sha1
LEFT JOIN tag_assignment AS ta ON o.sha1 = ta.sha1
GROUP BY o.sha1`)
GROUP BY o.sha1
ORDER BY path`)
if err != nil {
return nil, err
}
@@ -742,7 +730,7 @@ func handleAPIOrphans(w http.ResponseWriter, r *http.Request) {
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// --- API: Image view ---------------------------------------------------------
func getImageDimensions(sha1 string) (w int64, h int64, err error) {
err = db.QueryRow(`SELECT width, height FROM image WHERE sha1 = ?`,
@@ -845,7 +833,7 @@ func handleAPIInfo(w http.ResponseWriter, r *http.Request) {
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// --- API: Image similar ------------------------------------------------------
type webSimilarImage struct {
SHA1 string `json:"sha1"`
@@ -857,15 +845,17 @@ type webSimilarImage struct {
func getSimilar(sha1 string, dhash int64, pixels int64, distance int) (
result []webSimilarImage, err error) {
// For distance ∈ {0, 1}, this query is quite inefficient.
// In exchange, it's generic.
//
// If there's a dhash, there should also be thumbnail dimensions,
// so not bothering with IFNULL on them.
rows, err := db.Query(`
SELECT sha1, width * height, IFNULL(thumbw, 0), IFNULL(thumbh, 0)
FROM image WHERE sha1 <> ? AND dhash IS NOT NULL
AND hamming(dhash, ?) = ?`, sha1, dhash, distance)
// If there's a dhash, there should also be thumbnail dimensions.
var rows *sql.Rows
common := `SELECT sha1, width * height, IFNULL(thumbw, 0), IFNULL(thumbh, 0)
FROM image WHERE sha1 <> ? AND `
if distance == 0 {
rows, err = db.Query(common+`dhash = ?`, sha1, dhash)
} else {
// This is generic, but quite inefficient for distance ∈ {0, 1}.
rows, err = db.Query(common+`dhash IS NOT NULL
AND hamming(dhash, ?) = ?`, sha1, dhash, distance)
}
if err != nil {
return nil, err
}
@@ -955,25 +945,90 @@ func handleAPISimilar(w http.ResponseWriter, r *http.Request) {
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// --- API: Search -------------------------------------------------------------
// The SQL building is the most miserable part of the whole program.
// NOTE: AND will mean MULTIPLY(IFNULL(ta.weight, 0)) per SHA1.
const searchCTE = `WITH
const searchCTE1 = `WITH
matches(sha1, thumbw, thumbh, score) AS (
SELECT i.sha1, i.thumbw, i.thumbh, ta.weight AS score
FROM tag_assignment AS ta
JOIN image AS i ON i.sha1 = ta.sha1
WHERE ta.tag = ?
),
supertags(tag, space, name) AS (
SELECT DISTINCT ta.tag, ts.name, t.name
FROM tag_assignment AS ta
JOIN matches AS m ON m.sha1 = ta.sha1
JOIN tag AS t ON ta.tag = t.id
JOIN tag_space AS ts ON ts.id = t.space
WHERE ta.tag = %d
)
`
const searchCTEMulti = `WITH
positive(tag) AS (VALUES %s),
filtered(sha1) AS (%s),
matches(sha1, thumbw, thumbh, score) AS (
SELECT i.sha1, i.thumbw, i.thumbh,
product(IFNULL(ta.weight, 0)) AS score
FROM image AS i, positive AS p
JOIN filtered AS c ON i.sha1 = c.sha1
LEFT JOIN tag_assignment AS ta ON ta.sha1 = i.sha1 AND ta.tag = p.tag
GROUP BY i.sha1
)
`
func searchQueryToCTE(tx *sql.Tx, query string) (string, error) {
positive, negative := []int64{}, []int64{}
for _, word := range strings.Split(query, " ") {
if word == "" {
continue
}
space, tag, _ := strings.Cut(word, ":")
negated := false
if strings.HasPrefix(space, "-") {
space = space[1:]
negated = true
}
var tagID int64
err := tx.QueryRow(`
SELECT t.id FROM tag AS t
JOIN tag_space AS ts ON t.space = ts.id
WHERE ts.name = ? AND t.name = ?`, space, tag).Scan(&tagID)
if err != nil {
return "", err
}
if negated {
negative = append(negative, tagID)
} else {
positive = append(positive, tagID)
}
}
// Don't return most of the database, and simplify the following builder.
if len(positive) == 0 {
return "", errors.New("search is too wide")
}
// Optimise single tag searches.
if len(positive) == 1 && len(negative) == 0 {
return fmt.Sprintf(searchCTE1, positive[0]), nil
}
values := fmt.Sprintf(`(%d)`, positive[0])
filtered := fmt.Sprintf(
`SELECT sha1 FROM tag_assignment WHERE tag = %d`, positive[0])
for _, tagID := range positive[1:] {
values += fmt.Sprintf(`, (%d)`, tagID)
filtered += fmt.Sprintf(` INTERSECT
SELECT sha1 FROM tag_assignment WHERE tag = %d`, tagID)
}
for _, tagID := range negative {
filtered += fmt.Sprintf(` EXCEPT
SELECT sha1 FROM tag_assignment WHERE tag = %d`, tagID)
}
return fmt.Sprintf(searchCTEMulti, values, filtered), nil
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type webTagMatch struct {
SHA1 string `json:"sha1"`
ThumbW int64 `json:"thumbW"`
@@ -981,10 +1036,10 @@ type webTagMatch struct {
Score float32 `json:"score"`
}
func getTagMatches(tag int64) (matches []webTagMatch, err error) {
rows, err := db.Query(searchCTE+`
func getTagMatches(tx *sql.Tx, cte string) (matches []webTagMatch, err error) {
rows, err := tx.Query(cte + `
SELECT sha1, IFNULL(thumbw, 0), IFNULL(thumbh, 0), score
FROM matches`, tag)
FROM matches`)
if err != nil {
return nil, err
}
@@ -1008,9 +1063,14 @@ type webTagSupertag struct {
score float32
}
func getTagSupertags(tag int64) (result map[int64]*webTagSupertag, err error) {
rows, err := db.Query(searchCTE+`
SELECT tag, space, name FROM supertags`, tag)
func getTagSupertags(tx *sql.Tx, cte string) (
result map[int64]*webTagSupertag, err error) {
rows, err := tx.Query(cte + `
SELECT DISTINCT ta.tag, ts.name, t.name
FROM tag_assignment AS ta
JOIN matches AS m ON m.sha1 = ta.sha1
JOIN tag AS t ON ta.tag = t.id
JOIN tag_space AS ts ON ts.id = t.space`)
if err != nil {
return nil, err
}
@@ -1035,18 +1095,18 @@ type webTagRelated struct {
Score float32 `json:"score"`
}
func getTagRelated(tag int64, matches int) (
func getTagRelated(tx *sql.Tx, cte string, matches int) (
result map[string][]webTagRelated, err error) {
// Not sure if this level of efficiency is achievable directly in SQL.
supertags, err := getTagSupertags(tag)
supertags, err := getTagSupertags(tx, cte)
if err != nil {
return nil, err
}
rows, err := db.Query(searchCTE+`
rows, err := tx.Query(cte + `
SELECT ta.tag, ta.weight
FROM tag_assignment AS ta
JOIN matches AS m ON m.sha1 = ta.sha1`, tag)
JOIN matches AS m ON m.sha1 = ta.sha1`)
if err != nil {
return nil, err
}
@@ -1087,13 +1147,14 @@ func handleAPISearch(w http.ResponseWriter, r *http.Request) {
Related map[string][]webTagRelated `json:"related"`
}
space, tag, _ := strings.Cut(params.Query, ":")
tx, err := db.Begin()
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
defer tx.Rollback()
var tagID int64
err := db.QueryRow(`
SELECT t.id FROM tag AS t
JOIN tag_space AS ts ON t.space = ts.id
WHERE ts.name = ? AND t.name = ?`, space, tag).Scan(&tagID)
cte, err := searchQueryToCTE(tx, params.Query)
if errors.Is(err, sql.ErrNoRows) {
http.Error(w, err.Error(), http.StatusNotFound)
return
@@ -1102,11 +1163,11 @@ func handleAPISearch(w http.ResponseWriter, r *http.Request) {
return
}
if result.Matches, err = getTagMatches(tagID); err != nil {
if result.Matches, err = getTagMatches(tx, cte); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if result.Related, err = getTagRelated(tagID,
if result.Related, err = getTagRelated(tx, cte,
len(result.Matches)); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
@@ -1117,7 +1178,47 @@ func handleAPISearch(w http.ResponseWriter, r *http.Request) {
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// --- Web ---------------------------------------------------------------------
var hashRE = regexp.MustCompile(`^/.*?/([0-9a-f]{40})$`)
var staticHandler http.Handler
var page = template.Must(template.New("/").Parse(`<!DOCTYPE html><html><head>
<title>Gallery</title>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel=stylesheet href=style.css>
</head><body>
<noscript>This is a web application, and requires Javascript.</noscript>
<script src=mithril.js></script>
<script src=gallery.js></script>
</body></html>`))
func handleRequest(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/" {
staticHandler.ServeHTTP(w, r)
return
}
if err := page.Execute(w, nil); err != nil {
log.Println(err)
}
}
func handleImages(w http.ResponseWriter, r *http.Request) {
if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil {
http.NotFound(w, r)
} else {
http.ServeFile(w, r, imagePath(m[1]))
}
}
func handleThumbs(w http.ResponseWriter, r *http.Request) {
if m := hashRE.FindStringSubmatch(r.URL.Path); m == nil {
http.NotFound(w, r)
} else {
http.ServeFile(w, r, thumbPath(m[1]))
}
}
// cmdWeb runs a web UI against GD on ADDRESS.
func cmdWeb(fs *flag.FlagSet, args []string) error {
@@ -1191,6 +1292,9 @@ type syncContext struct {
stmtDisposeSub *sql.Stmt
stmtDisposeAll *sql.Stmt
// exclude specifies filesystem paths that should be seen as missing.
exclude *regexp.Regexp
// linked tracks which image hashes we've checked so far in the run.
linked map[string]struct{}
}
@@ -1285,7 +1389,7 @@ func syncIsImage(path string) (bool, error) {
}
func syncPingImage(path string) (int, int, error) {
out, err := exec.Command("magick", "identify", "-limit", "thread", "1",
out, err := exec.Command("identify", "-limit", "thread", "1",
"-ping", "-format", "%w %h", path+"[0]").Output()
if err != nil {
return 0, 0, err
@@ -1599,6 +1703,12 @@ func syncDirectory(c *syncContext, dbParent int64, fsPath string) error {
fs = nil
}
if c.exclude != nil {
fs = slices.DeleteFunc(fs, func(f syncFile) bool {
return c.exclude.MatchString(filepath.Join(fsPath, f.fsName))
})
}
// Convert differences to a form more convenient for processing.
iDB, iFS, pairs := 0, 0, []syncPair{}
for iDB < len(db) && iFS < len(fs) {
@@ -1774,9 +1884,21 @@ const disposeCTE = `WITH RECURSIVE
HAVING count = total
)`
type excludeRE struct{ re *regexp.Regexp }
func (re *excludeRE) String() string { return fmt.Sprintf("%v", re.re) }
func (re *excludeRE) Set(value string) error {
var err error
re.re, err = regexp.Compile(value)
return err
}
// cmdSync ensures the given (sub)roots are accurately reflected
// in the database.
func cmdSync(fs *flag.FlagSet, args []string) error {
var exclude excludeRE
fs.Var(&exclude, "exclude", "exclude paths matching regular expression")
fullpaths := fs.Bool("fullpaths", false, "don't basename arguments")
if err := fs.Parse(args); err != nil {
return err
@@ -1814,7 +1936,7 @@ func cmdSync(fs *flag.FlagSet, args []string) error {
}
c := syncContext{ctx: ctx, tx: tx, pb: newProgressBar(-1),
linked: make(map[string]struct{})}
exclude: exclude.re, linked: make(map[string]struct{})}
defer c.pb.Stop()
if c.stmtOrphan, err = c.tx.Prepare(disposeCTE + `
@@ -1910,6 +2032,88 @@ func cmdRemove(fs *flag.FlagSet, args []string) error {
return tx.Commit()
}
// --- Forgetting --------------------------------------------------------------
// cmdForget is for purging orphaned images from the database.
func cmdForget(fs *flag.FlagSet, args []string) error {
if err := fs.Parse(args); err != nil {
return err
}
if fs.NArg() < 2 {
return errWrongUsage
}
if err := openDB(fs.Arg(0)); err != nil {
return err
}
tx, err := db.Begin()
if err != nil {
return err
}
defer tx.Rollback()
// Creating a temporary database seems justifiable in this case.
_, err = tx.Exec(
`CREATE TEMPORARY TABLE forgotten (sha1 TEXT PRIMARY KEY)`)
if err != nil {
return err
}
stmt, err := tx.Prepare(`INSERT INTO forgotten (sha1) VALUES (?)`)
if err != nil {
return err
}
defer stmt.Close()
for _, sha1 := range fs.Args()[1:] {
if _, err := stmt.Exec(sha1); err != nil {
return err
}
}
rows, err := tx.Query(`DELETE FROM forgotten
WHERE sha1 IN (SELECT sha1 FROM node)
OR sha1 NOT IN (SELECT sha1 FROM image)
RETURNING sha1`)
if err != nil {
return err
}
defer rows.Close()
for rows.Next() {
var sha1 string
if err := rows.Scan(&sha1); err != nil {
return err
}
log.Printf("not an orphan or not known at all: %s", sha1)
}
if _, err = tx.Exec(`
DELETE FROM tag_assignment WHERE sha1 IN (SELECT sha1 FROM forgotten);
DELETE FROM orphan WHERE sha1 IN (SELECT sha1 FROM forgotten);
DELETE FROM image WHERE sha1 IN (SELECT sha1 FROM forgotten);
`); err != nil {
return err
}
rows, err = tx.Query(`SELECT sha1 FROM forgotten`)
if err != nil {
return err
}
defer rows.Close()
for rows.Next() {
var sha1 string
if err := rows.Scan(&sha1); err != nil {
return err
}
if err := os.Remove(imagePath(sha1)); err != nil &&
!os.IsNotExist(err) {
log.Printf("%s", err)
}
if err := os.Remove(thumbPath(sha1)); err != nil &&
!os.IsNotExist(err) {
log.Printf("%s", err)
}
}
return tx.Commit()
}
// --- Tagging -----------------------------------------------------------------
// cmdTag mass imports tags from data passed on stdin as a TSV
@@ -2032,36 +2236,54 @@ func collectFileListing(root string) (paths []string, err error) {
return
}
func checkFiles(root, suffix string, hashes []string) (bool, []string, error) {
func checkFiles(gc bool,
root, suffix string, hashes []string) (bool, []string, error) {
db := hashesToFileListing(root, suffix, hashes)
fs, err := collectFileListing(root)
if err != nil {
return false, nil, err
}
iDB, iFS, ok, intersection := 0, 0, true, []string{}
// There are two legitimate cases of FS-only database files:
// 1. There is no code to unlink images at all
// (although sync should create orphan records for everything).
// 2. thumbnail: failures may result in an unreferenced garbage image.
ok := true
onlyDB := func(path string) {
ok = false
fmt.Printf("only in DB: %s\n", path)
}
onlyFS := func(path string) {
if !gc {
ok = false
fmt.Printf("only in FS: %s\n", path)
} else if err := os.Remove(path); err != nil {
ok = false
fmt.Printf("only in FS (removing failed): %s: %s\n", path, err)
} else {
fmt.Printf("only in FS (removing): %s\n", path)
}
}
iDB, iFS, intersection := 0, 0, []string{}
for iDB < len(db) && iFS < len(fs) {
if db[iDB] == fs[iFS] {
intersection = append(intersection, db[iDB])
iDB++
iFS++
} else if db[iDB] < fs[iFS] {
ok = false
fmt.Printf("only in DB: %s\n", db[iDB])
onlyDB(db[iDB])
iDB++
} else {
ok = false
fmt.Printf("only in FS: %s\n", fs[iFS])
onlyFS(fs[iFS])
iFS++
}
}
for _, path := range db[iDB:] {
ok = false
fmt.Printf("only in DB: %s\n", path)
onlyDB(path)
}
for _, path := range fs[iFS:] {
ok = false
fmt.Printf("only in FS: %s\n", path)
onlyFS(path)
}
return ok, intersection, nil
}
@@ -2109,6 +2331,7 @@ func checkHashes(paths []string) (bool, error) {
// cmdCheck carries out various database consistency checks.
func cmdCheck(fs *flag.FlagSet, args []string) error {
full := fs.Bool("full", false, "verify image hashes")
gc := fs.Bool("gc", false, "garbage collect database files")
if err := fs.Parse(args); err != nil {
return err
}
@@ -2145,13 +2368,13 @@ func cmdCheck(fs *flag.FlagSet, args []string) error {
// This somewhat duplicates {image,thumb}Path().
log.Println("checking SQL against filesystem")
okImages, intersection, err := checkFiles(
okImages, intersection, err := checkFiles(*gc,
filepath.Join(galleryDirectory, nameOfImageRoot), "", allSHA1)
if err != nil {
return err
}
okThumbs, _, err := checkFiles(
okThumbs, _, err := checkFiles(*gc,
filepath.Join(galleryDirectory, nameOfThumbRoot), ".webp", thumbSHA1)
if err != nil {
return err
@@ -2160,11 +2383,11 @@ func cmdCheck(fs *flag.FlagSet, args []string) error {
ok = false
}
log.Println("checking for dead symlinks")
log.Println("checking for dead symlinks (should become orphans on sync)")
for _, path := range intersection {
if _, err := os.Stat(path); err != nil {
ok = false
fmt.Printf("%s: %s\n", path, err)
fmt.Printf("%s: %s\n", path, err.(*os.PathError).Unwrap())
}
}
@@ -2229,7 +2452,7 @@ func makeThumbnail(load bool, pathImage, pathThumb string) (
//
// TODO: See if we can optimize resulting WebP animations.
// (Do -layers optimize* apply to this format at all?)
cmd := exec.Command("magick", "-limit", "thread", "1",
cmd := exec.Command("convert", "-limit", "thread", "1",
// Do not invite the OOM killer, a particularly unpleasant guest.
"-limit", "memory", memoryLimit,
@@ -2502,6 +2725,7 @@ var commands = map[string]struct {
"tag": {cmdTag, "GD SPACE [DESCRIPTION]", "Import tags."},
"sync": {cmdSync, "GD ROOT...", "Synchronise with the filesystem."},
"remove": {cmdRemove, "GD PATH...", "Remove database subtrees."},
"forget": {cmdForget, "GD SHA1...", "Dispose of orphans."},
"check": {cmdCheck, "GD", "Run consistency checks."},
"thumbnail": {cmdThumbnail, "GD [SHA1...]", "Generate thumbnails."},
"dhash": {cmdDhash, "GD [SHA1...]", "Compute perceptual hashes."},
@@ -2565,6 +2789,9 @@ func main() {
// Note that the database object has a closing finalizer,
// we just additionally print any errors coming from there.
if db != nil {
if _, err := db.Exec(`PRAGMA optimize`); err != nil {
log.Println(err)
}
if err := db.Close(); err != nil {
log.Println(err)
}

View File

@@ -10,7 +10,7 @@ function call(method, params) {
callActive++
return m.request({
method: "POST",
url: `/api/${method}`,
url: `api/${method}`,
body: params,
}).then(result => {
callActive--
@@ -98,7 +98,7 @@ let Thumbnail = {
if (!e.thumbW || !e.thumbH)
return m('.thumbnail.missing', {...vnode.attrs, info: null})
return m('img.thumbnail', {...vnode.attrs, info: null,
src: `/thumb/${e.sha1}`, width: e.thumbW, height: e.thumbH,
src: `thumb/${e.sha1}`, width: e.thumbW, height: e.thumbH,
loading})
},
}
@@ -472,13 +472,15 @@ let ViewBar = {
m('ul', ViewModel.paths.map(path =>
m('li', m(ViewBarPath, {path})))),
m('h2', "Tags"),
Object.entries(ViewModel.tags).map(([space, tags]) => [
m("h3", m(m.route.Link, {href: `/tags/${space}`}, space)),
m("ul.tags", Object.entries(tags)
.sort(([t1, w1], [t2, w2]) => (w2 - w1))
.map(([tag, score]) =>
m(ScoredTag, {space, tagname: tag, score}))),
]),
Object.entries(ViewModel.tags).map(([space, tags]) =>
m('details[open]', [
m('summary', m("h3",
m(m.route.Link, {href: `/tags/${space}`}, space))),
m("ul.tags", Object.entries(tags)
.sort(([t1, w1], [t2, w2]) => (w2 - w1))
.map(([tag, score]) =>
m(ScoredTag, {space, tagname: tag, score}))),
])),
])
},
}
@@ -492,7 +494,7 @@ let View = {
view(vnode) {
const view = m('.view', [
ViewModel.sha1 !== undefined
? m('img', {src: `/image/${ViewModel.sha1}`,
? m('img', {src: `image/${ViewModel.sha1}`,
width: ViewModel.width, height: ViewModel.height})
: "No image.",
])
@@ -609,13 +611,14 @@ let SearchRelated = {
view(vnode) {
return Object.entries(SearchModel.related)
.sort((a, b) => a[0].localeCompare(b[0]))
.map(([space, tags]) => [
m('h2', space),
.map(([space, tags]) => m('details[open]', [
m('summary', m('h2',
m(m.route.Link, {href: `/tags/${space}`}, space))),
m('ul.tags', tags
.sort((a, b) => (b.score - a.score))
.map(({tag, score}) =>
m(ScoredTag, {space, tagname: tag, score}))),
])
]))
},
}
@@ -646,7 +649,11 @@ let Search = {
m(Header),
m('.body', {}, [
m('.sidebar', [
m('p', SearchModel.query),
m('input', {
value: SearchModel.query,
onchange: event => m.route.set(
`/search/:key`, {key: event.target.value}),
}),
m(SearchRelated),
]),
m(SearchView),

View File

@@ -24,11 +24,15 @@ a { color: inherit; }
.header .activity { padding: .25rem .5rem; align-self: center; color: #fff; }
.header .activity.error { color: #f00; }
summary h2, summary h3 { display: inline-block; }
.sidebar { padding: .25rem .5rem; background: var(--shade-color);
border-right: 1px solid #ccc; overflow: auto;
min-width: 10rem; max-width: 20rem; flex-shrink: 0; }
.sidebar input { width: 100%; box-sizing: border-box; margin: .5rem 0;
font-size: inherit; }
.sidebar h2 { margin: 0.5em 0 0.25em 0; padding: 0; font-size: 1.2rem; }
.sidebar ul { margin: .5rem 0; padding: 0; }
.sidebar ul { margin: 0; padding: 0; }
.sidebar .path { margin: .5rem -.5rem; }
.sidebar .path li { margin: 0; padding: 0; }
@@ -79,7 +83,7 @@ img.thumbnail, .thumbnail.missing { box-shadow: 0 0 3px rgba(0, 0, 0, 0.75);
.viewbar { padding: .25rem .5rem; background: #eee;
border-left: 1px solid #ccc; min-width: 20rem; overflow: auto; }
.viewbar h2 { margin: 0.5em 0 0.25em 0; padding: 0; font-size: 1.2rem; }
.viewbar h3 { margin: 0.25em 0; padding: 0; font-size: 1.1rem; }
.viewbar h3 { margin: 0.5em 0 0.25em 0; padding: 0; font-size: 1.1rem; }
.viewbar ul { margin: 0; padding: 0 0 0 1.25em; list-style-type: "- "; }
.viewbar ul.tags { padding: 0; list-style-type: none; }
.viewbar li { margin: 0; padding: 0; }

View File

@@ -16,6 +16,9 @@ sha1duplicate=$sha1
cp $input/Test/dhash.png \
$input/Test/multiple-paths.png
gen -seed 15 -size 256x256 plasma:fractal \
$input/Test/excluded.png
gen -seed 20 -size 160x128 plasma:fractal \
-bordercolor transparent -border 64 \
$input/Test/transparent-wide.png
@@ -36,7 +39,7 @@ gen $input/Test/animation-small.gif \
$input/Test/video.mp4
./gallery init $target
./gallery sync $target $input "$@"
./gallery sync -exclude '/excluded[.]' $target $input "$@"
./gallery thumbnail $target
./gallery dhash $target
./gallery tag $target test "Test space" <<-END
@@ -47,7 +50,7 @@ END
# TODO: Test all the various possible sync transitions.
mv $input/Test $input/Plasma
./gallery sync $target $input
./gallery sync -exclude '/excluded[.]' $target $input
./gallery web $target :8080 &
web=$!