gallery: optimize the related tags query

This commit is contained in:
Přemysl Eric Janouch 2024-01-22 15:06:53 +01:00
parent 7300773b96
commit f9f22ba42c
Signed by: p
GPG Key ID: A0420B94F92B9493
1 changed files with 60 additions and 28 deletions

88
main.go
View File

@ -965,22 +965,12 @@ const searchCTE = `WITH
JOIN image AS i ON i.sha1 = ta.sha1 JOIN image AS i ON i.sha1 = ta.sha1
WHERE ta.tag = ? WHERE ta.tag = ?
), ),
supertags(tag) AS ( supertags(tag, space, name) AS (
SELECT DISTINCT ta.tag SELECT DISTINCT ta.tag, ts.name, t.name
FROM tag_assignment AS ta FROM tag_assignment AS ta
JOIN matches AS m ON m.sha1 = ta.sha1 JOIN matches AS m ON m.sha1 = ta.sha1
), JOIN tag AS t ON ta.tag = t.id
scoredtags(tag, score) AS ( JOIN tag_space AS ts ON ts.id = t.space
-- The cross join is a deliberate optimization,
-- and this query may still be really slow.
SELECT st.tag, AVG(IFNULL(ta.weight, 0)) AS score
FROM matches AS m
CROSS JOIN supertags AS st
LEFT JOIN tag_assignment AS ta
ON ta.sha1 = m.sha1 AND ta.tag = st.tag
GROUP BY st.tag
-- Using the column alias doesn't fail, but it also doesn't work.
HAVING AVG(IFNULL(ta.weight, 0)) >= 0.01
) )
` `
@ -1012,32 +1002,73 @@ func getTagMatches(tag int64) (matches []webTagMatch, err error) {
return matches, rows.Err() return matches, rows.Err()
} }
type webTagRelated struct { type webTagSupertag struct {
Tag string `json:"tag"` space string
Score float32 `json:"score"` tag string
score float32
} }
func getTagRelated(tag int64) (result map[string][]webTagRelated, err error) { func getTagSupertags(tag int64) (result map[int64]*webTagSupertag, err error) {
rows, err := db.Query(searchCTE+` rows, err := db.Query(searchCTE+`
SELECT ts.name, t.name, st.score FROM scoredtags AS st SELECT tag, space, name FROM supertags`, tag)
JOIN tag AS t ON st.tag = t.id
JOIN tag_space AS ts ON ts.id = t.space
ORDER BY st.score DESC`, tag)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer rows.Close() defer rows.Close()
result = make(map[string][]webTagRelated) result = make(map[int64]*webTagSupertag)
for rows.Next() { for rows.Next() {
var ( var (
space string tag int64
r webTagRelated st webTagSupertag
) )
if err = rows.Scan(&space, &r.Tag, &r.Score); err != nil { if err = rows.Scan(&tag, &st.space, &st.tag); err != nil {
return nil, err return nil, err
} }
result[space] = append(result[space], r) result[tag] = &st
}
return result, rows.Err()
}
type webTagRelated struct {
Tag string `json:"tag"`
Score float32 `json:"score"`
}
func getTagRelated(tag int64, matches int) (
result map[string][]webTagRelated, err error) {
// Not sure if this level of efficiency is achievable directly in SQL.
supertags, err := getTagSupertags(tag)
if err != nil {
return nil, err
}
rows, err := db.Query(searchCTE+`
SELECT ta.tag, ta.weight
FROM tag_assignment AS ta
JOIN matches AS m ON m.sha1 = ta.sha1`, tag)
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var (
tag int64
weight float32
)
if err = rows.Scan(&tag, &weight); err != nil {
return nil, err
}
supertags[tag].score += weight
}
result = make(map[string][]webTagRelated)
for _, info := range supertags {
if score := info.score / float32(matches); score >= 0.1 {
r := webTagRelated{Tag: info.tag, Score: score}
result[info.space] = append(result[info.space], r)
}
} }
return result, rows.Err() return result, rows.Err()
} }
@ -1075,7 +1106,8 @@ func handleAPISearch(w http.ResponseWriter, r *http.Request) {
http.Error(w, err.Error(), http.StatusInternalServerError) http.Error(w, err.Error(), http.StatusInternalServerError)
return return
} }
if result.Related, err = getTagRelated(tagID); err != nil { if result.Related, err = getTagRelated(tagID,
len(result.Matches)); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError) http.Error(w, err.Error(), http.StatusInternalServerError)
return return
} }