Mostly done, first cut

This commit is contained in:
2025-05-01 23:39:51 +09:30
parent badbe5e92f
commit 58b6692d1b
11 changed files with 248 additions and 156 deletions

View File

@@ -3,9 +3,11 @@ package db
import (
"errors"
"fmt"
"html/template"
"io"
"log"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
@@ -25,9 +27,9 @@ type BookmarkManager struct {
}
type SearchOptions struct {
Query string
Tags []string
Sort string
All bool
Query string
Results int
}
func NewBookmarkManager(db *DB) *BookmarkManager {
@@ -70,15 +72,15 @@ func (m *BookmarkManager) DeleteBookmark(bm *entity.Bookmark) error {
}
// ListBookmarks returns all bookmarks.
func (m *BookmarkManager) ListBookmarks() ([]entity.Bookmark, error) {
bookmarks := make([]entity.Bookmark, 0)
err := m.db.store.Find(&bookmarks, &bolthold.Query{})
if err != nil {
panic(err)
}
log.Printf("found %d bookmarks", len(bookmarks))
return bookmarks, nil
}
// func (m *BookmarkManager) ListBookmarks() ([]entity.Bookmark, error) {
// bookmarks := make([]entity.Bookmark, 0)
// err := m.db.store.Find(&bookmarks, &bolthold.Query{})
// if err != nil {
// panic(err)
// }
// log.Printf("found %d bookmarks", len(bookmarks))
// return bookmarks, nil
// }
// ExportBookmarks exports all bookmarks to an io.Writer
func (m *BookmarkManager) ExportBookmarks(w io.Writer) error {
@@ -111,30 +113,47 @@ func (m *BookmarkManager) LoadBookmarkByID(id uint64) entity.Bookmark {
return ret
}
func (m *BookmarkManager) Search(opts SearchOptions) ([]entity.Bookmark, error) {
found := []entity.Bookmark{}
log.Printf("search with query: %s", opts.Query)
if opts.Sort != "" {
panic("unimplemented sort")
}
if len(opts.Tags) > 0 {
panic("unimplemented tags")
func (m *BookmarkManager) Search(opts SearchOptions) ([]entity.BookmarkSearchResult, error) {
found := []entity.BookmarkSearchResult{}
if opts.All && opts.Query != "" {
panic("can't fetch all with query")
}
sr, err := m.db.bleve.Search(bleve.NewSearchRequest(
query.NewQueryStringQuery(opts.Query)))
var q query.Query
if opts.All {
q = bleve.NewMatchAllQuery()
} else {
q = bleve.NewDisjunctionQuery(
bleve.NewMatchQuery(opts.Query),
bleve.NewTermQuery(opts.Query),
)
}
req := bleve.NewSearchRequest(q)
if opts.Results > 0 {
req.Size = opts.Results
}
req.Highlight = bleve.NewHighlightWithStyle("html")
sr, err := m.db.bleve.Search(req)
if err != nil {
panic(err)
}
log.Printf("total: %d", sr.Total)
log.Printf("string: %s", sr.String())
// log.Printf("%#v", m.db.bleve.StatsMap())
if sr.Total > 0 {
for _, dm := range sr.Hits {
log.Printf("hit: %s => %s", dm.ID, dm.String())
id, _ := strconv.ParseUint(dm.ID, 10, 64)
found = append(found, m.LoadBookmarkByID(id))
bm := m.LoadBookmarkByID(id)
bsr := entity.BookmarkSearchResult{
Bookmark: bm,
Score: dm.Score,
Highlight: template.HTML(strings.Join(dm.Fragments["Info.RawText"], "\n")),
}
found = append(found, bsr)
}
}
@@ -255,5 +274,25 @@ func (m *BookmarkManager) Stats() (entity.DBStats, error) {
return stats, fmt.Errorf("could not load db file size: %s", err)
}
stats.FileSize = int(fi.Size())
indexSize, err := getBleveIndexSize(m.db.file + ".bleve")
if err != nil {
return entity.DBStats{}, err
}
stats.IndexSize = int(indexSize)
return stats, nil
}
func getBleveIndexSize(path string) (int64, error) {
var size int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
size += info.Size()
}
return nil
})
return size, err
}

View File

@@ -5,6 +5,9 @@ import (
"time"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/analysis/lang/en"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/tardisx/linkwallet/entity"
bolthold "github.com/timshannon/bolthold"
@@ -16,6 +19,7 @@ type DB struct {
bleve bleve.Index
}
// Open opens the bookmark boltdb, and the bleve index.
func (db *DB) Open(path string) error {
// options := bolthold.DefaultOptions
// options.Dir = dir
@@ -47,20 +51,27 @@ func (db *DB) Open(path string) error {
}
func createIndexMapping() mapping.IndexMapping {
indexMapping := bleve.NewIndexMapping()
englishTextFieldMapping := bleve.NewTextFieldMapping()
englishTextFieldMapping.Analyzer = en.AnalyzerName
// a generic reusable mapping for keyword text
keywordFieldMapping := bleve.NewTextFieldMapping()
keywordFieldMapping.Analyzer = keyword.Name
pageInfoMapping := bleve.NewDocumentMapping()
pageInfoMapping.AddFieldMappingsAt("Title", bleve.NewTextFieldMapping())
pageInfoMapping.AddFieldMappingsAt("Title", englishTextFieldMapping)
pageInfoMapping.AddFieldMappingsAt("Size", bleve.NewNumericFieldMapping())
pageInfoMapping.AddFieldMappingsAt("RawText", bleve.NewTextFieldMapping())
pageInfoMapping.AddFieldMappingsAt("RawText", englishTextFieldMapping)
bookmarkMapping := bleve.NewDocumentMapping()
bookmarkMapping.AddFieldMappingsAt("URL", bleve.NewTextFieldMapping())
bookmarkMapping.AddFieldMappingsAt("Tags", bleve.NewTextFieldMapping())
bookmarkMapping.AddFieldMappingsAt("Tags", keywordFieldMapping)
bookmarkMapping.AddSubDocumentMapping("Info", pageInfoMapping)
indexMapping.AddDocumentMapping("bookmark", bookmarkMapping)
return indexMapping
}
@@ -111,17 +122,11 @@ func (db *DB) UpdateBookmarkStats() error {
}
// count bookmarks and words indexed
bmI := entity.Bookmark{}
wiI := entity.WordIndex{}
bookmarkCount, err := db.store.TxCount(txn, &bmI, &bolthold.Query{})
if err != nil {
txn.Rollback()
return fmt.Errorf("could not get bookmark count: %s", err)
}
indexWordCount, err := db.store.TxCount(txn, &wiI, &bolthold.Query{})
if err != nil {
txn.Rollback()
return fmt.Errorf("could not get index word count: %s", err)
}
// bucket these stats by day
now := time.Now().Truncate(time.Hour * 24)
@@ -135,7 +140,7 @@ func (db *DB) UpdateBookmarkStats() error {
if stats.History == nil {
stats.History = make(map[time.Time]entity.BookmarkInfo)
}
stats.History[now] = entity.BookmarkInfo{Bookmarks: bookmarkCount, IndexedWords: indexWordCount}
stats.History[now] = entity.BookmarkInfo{Bookmarks: bookmarkCount}
err = db.store.TxUpsert(txn, "stats", &stats)
if err != nil {
txn.Rollback()

View File

@@ -5,6 +5,10 @@ import (
"net/http/httptest"
"os"
"testing"
"time"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/search/query"
"github.com/tardisx/linkwallet/entity"
)
@@ -141,3 +145,113 @@ func TestTagIndexing(t *testing.T) {
t.Error("did not get one id for sloth")
}
}
func testBM() entity.Bookmark {
return entity.Bookmark{
ID: 1,
URL: "https://one.com",
Info: entity.PageInfo{
Fetched: time.Time{},
Title: "one web",
Size: 200,
StatusCode: 200,
RawText: "one web site is great for all humans",
},
Tags: []string{"hello", "big friends"},
PreserveTitle: false,
TimestampCreated: time.Time{},
TimestampLastScraped: time.Time{},
}
}
func TestMappings(t *testing.T) {
mapping := createIndexMapping()
idx, err := bleve.NewMemOnly(mapping)
if err != nil {
t.Error(err)
t.FailNow()
}
bm := testBM()
err = idx.Index("1", bm)
if err != nil {
panic(err)
}
type tc struct {
query query.Query
expHits int
}
tcs := []tc{
{query: bleve.NewMatchQuery("human"), expHits: 1},
{query: bleve.NewMatchQuery("humanoid"), expHits: 0},
{query: bleve.NewMatchQuery("hello"), expHits: 1},
{query: bleve.NewMatchQuery("big"), expHits: 0},
{query: bleve.NewMatchQuery("friends"), expHits: 0},
{query: bleve.NewMatchQuery("big friend"), expHits: 0},
{query: bleve.NewTermQuery("big friends"), expHits: 1},
{query: bleve.NewMatchQuery("web great"), expHits: 1},
}
for i := range tcs {
q := tcs[i].query
sr, err := idx.Search(bleve.NewSearchRequest(q))
if err != nil {
t.Error(err)
} else {
if len(sr.Hits) != tcs[i].expHits {
t.Errorf("wrong hits - expected %d got %d for %s", tcs[i].expHits, len(sr.Hits), tcs[i].query)
}
}
}
}
func TestMappingsDisjunctionQuery(t *testing.T) {
mapping := createIndexMapping()
idx, err := bleve.NewMemOnly(mapping)
if err != nil {
t.Error(err)
t.FailNow()
}
bm := testBM()
err = idx.Index("1", bm)
if err != nil {
panic(err)
}
type tc struct {
query string
expHits int
}
tcs := []tc{
{query: "human", expHits: 1},
{query: "humanoid", expHits: 0},
{query: "hello", expHits: 1},
{query: "big", expHits: 0},
{query: "friends", expHits: 0},
{query: "big friend", expHits: 0},
{query: "big friends", expHits: 1},
{query: "web great", expHits: 1},
}
for i := range tcs {
q := tcs[i].query
req := bleve.NewDisjunctionQuery(
bleve.NewMatchQuery(q),
bleve.NewTermQuery(q),
)
sr, err := idx.Search(bleve.NewSearchRequest(req))
if err != nil {
t.Error(err)
} else {
if len(sr.Hits) != tcs[i].expHits {
t.Errorf("wrong hits - expected %d got %d for %s", tcs[i].expHits, len(sr.Hits), tcs[i].query)
}
}
}
}