From 58b6692d1b397de91f3531c35c6518fb58ad285a Mon Sep 17 00:00:00 2001 From: Justin Hawkins Date: Thu, 1 May 2025 23:39:51 +0930 Subject: [PATCH] Mostly done, first cut --- db/bookmarks.go | 91 ++++++++++---- db/db.go | 27 +++-- db/index_test.go | 114 ++++++++++++++++++ entity/bookmark.go | 19 ++- entity/index.go | 38 ------ entity/meta.go | 12 +- web/templates/info.html | 5 +- web/templates/manage_results.html | 22 ++-- .../manage_results_column_header.html | 3 - web/templates/search_results.html | 5 +- web/web.go | 68 ++--------- 11 files changed, 248 insertions(+), 156 deletions(-) delete mode 100644 web/templates/manage_results_column_header.html diff --git a/db/bookmarks.go b/db/bookmarks.go index 509099d..5adc509 100644 --- a/db/bookmarks.go +++ b/db/bookmarks.go @@ -3,9 +3,11 @@ package db import ( "errors" "fmt" + "html/template" "io" "log" "os" + "path/filepath" "strconv" "strings" "sync" @@ -25,9 +27,9 @@ type BookmarkManager struct { } type SearchOptions struct { - Query string - Tags []string - Sort string + All bool + Query string + Results int } func NewBookmarkManager(db *DB) *BookmarkManager { @@ -70,15 +72,15 @@ func (m *BookmarkManager) DeleteBookmark(bm *entity.Bookmark) error { } // ListBookmarks returns all bookmarks. -func (m *BookmarkManager) ListBookmarks() ([]entity.Bookmark, error) { - bookmarks := make([]entity.Bookmark, 0) - err := m.db.store.Find(&bookmarks, &bolthold.Query{}) - if err != nil { - panic(err) - } - log.Printf("found %d bookmarks", len(bookmarks)) - return bookmarks, nil -} +// func (m *BookmarkManager) ListBookmarks() ([]entity.Bookmark, error) { +// bookmarks := make([]entity.Bookmark, 0) +// err := m.db.store.Find(&bookmarks, &bolthold.Query{}) +// if err != nil { +// panic(err) +// } +// log.Printf("found %d bookmarks", len(bookmarks)) +// return bookmarks, nil +// } // ExportBookmarks exports all bookmarks to an io.Writer func (m *BookmarkManager) ExportBookmarks(w io.Writer) error { @@ -111,30 +113,47 @@ func (m *BookmarkManager) LoadBookmarkByID(id uint64) entity.Bookmark { return ret } -func (m *BookmarkManager) Search(opts SearchOptions) ([]entity.Bookmark, error) { - found := []entity.Bookmark{} - log.Printf("search with query: %s", opts.Query) - if opts.Sort != "" { - panic("unimplemented sort") - } - if len(opts.Tags) > 0 { - panic("unimplemented tags") +func (m *BookmarkManager) Search(opts SearchOptions) ([]entity.BookmarkSearchResult, error) { + found := []entity.BookmarkSearchResult{} + if opts.All && opts.Query != "" { + panic("can't fetch all with query") } - sr, err := m.db.bleve.Search(bleve.NewSearchRequest( - query.NewQueryStringQuery(opts.Query))) + var q query.Query + + if opts.All { + q = bleve.NewMatchAllQuery() + } else { + + q = bleve.NewDisjunctionQuery( + bleve.NewMatchQuery(opts.Query), + bleve.NewTermQuery(opts.Query), + ) + } + + req := bleve.NewSearchRequest(q) + if opts.Results > 0 { + req.Size = opts.Results + } + req.Highlight = bleve.NewHighlightWithStyle("html") + + sr, err := m.db.bleve.Search(req) if err != nil { panic(err) } - log.Printf("total: %d", sr.Total) - log.Printf("string: %s", sr.String()) // log.Printf("%#v", m.db.bleve.StatsMap()) if sr.Total > 0 { for _, dm := range sr.Hits { - log.Printf("hit: %s => %s", dm.ID, dm.String()) + id, _ := strconv.ParseUint(dm.ID, 10, 64) - found = append(found, m.LoadBookmarkByID(id)) + bm := m.LoadBookmarkByID(id) + bsr := entity.BookmarkSearchResult{ + Bookmark: bm, + Score: dm.Score, + Highlight: template.HTML(strings.Join(dm.Fragments["Info.RawText"], "\n")), + } + found = append(found, bsr) } } @@ -255,5 +274,25 @@ func (m *BookmarkManager) Stats() (entity.DBStats, error) { return stats, fmt.Errorf("could not load db file size: %s", err) } stats.FileSize = int(fi.Size()) + indexSize, err := getBleveIndexSize(m.db.file + ".bleve") + if err != nil { + return entity.DBStats{}, err + } + stats.IndexSize = int(indexSize) + return stats, nil } + +func getBleveIndexSize(path string) (int64, error) { + var size int64 + err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() { + size += info.Size() + } + return nil + }) + return size, err +} diff --git a/db/db.go b/db/db.go index 244f232..c5d0a7d 100644 --- a/db/db.go +++ b/db/db.go @@ -5,6 +5,9 @@ import ( "time" "github.com/blevesearch/bleve/v2" + + "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" + "github.com/blevesearch/bleve/v2/analysis/lang/en" "github.com/blevesearch/bleve/v2/mapping" "github.com/tardisx/linkwallet/entity" bolthold "github.com/timshannon/bolthold" @@ -16,6 +19,7 @@ type DB struct { bleve bleve.Index } +// Open opens the bookmark boltdb, and the bleve index. func (db *DB) Open(path string) error { // options := bolthold.DefaultOptions // options.Dir = dir @@ -47,20 +51,27 @@ func (db *DB) Open(path string) error { } func createIndexMapping() mapping.IndexMapping { - indexMapping := bleve.NewIndexMapping() + englishTextFieldMapping := bleve.NewTextFieldMapping() + englishTextFieldMapping.Analyzer = en.AnalyzerName + + // a generic reusable mapping for keyword text + keywordFieldMapping := bleve.NewTextFieldMapping() + keywordFieldMapping.Analyzer = keyword.Name + pageInfoMapping := bleve.NewDocumentMapping() - pageInfoMapping.AddFieldMappingsAt("Title", bleve.NewTextFieldMapping()) + pageInfoMapping.AddFieldMappingsAt("Title", englishTextFieldMapping) pageInfoMapping.AddFieldMappingsAt("Size", bleve.NewNumericFieldMapping()) - pageInfoMapping.AddFieldMappingsAt("RawText", bleve.NewTextFieldMapping()) + pageInfoMapping.AddFieldMappingsAt("RawText", englishTextFieldMapping) bookmarkMapping := bleve.NewDocumentMapping() bookmarkMapping.AddFieldMappingsAt("URL", bleve.NewTextFieldMapping()) - bookmarkMapping.AddFieldMappingsAt("Tags", bleve.NewTextFieldMapping()) + bookmarkMapping.AddFieldMappingsAt("Tags", keywordFieldMapping) bookmarkMapping.AddSubDocumentMapping("Info", pageInfoMapping) indexMapping.AddDocumentMapping("bookmark", bookmarkMapping) + return indexMapping } @@ -111,17 +122,11 @@ func (db *DB) UpdateBookmarkStats() error { } // count bookmarks and words indexed bmI := entity.Bookmark{} - wiI := entity.WordIndex{} bookmarkCount, err := db.store.TxCount(txn, &bmI, &bolthold.Query{}) if err != nil { txn.Rollback() return fmt.Errorf("could not get bookmark count: %s", err) } - indexWordCount, err := db.store.TxCount(txn, &wiI, &bolthold.Query{}) - if err != nil { - txn.Rollback() - return fmt.Errorf("could not get index word count: %s", err) - } // bucket these stats by day now := time.Now().Truncate(time.Hour * 24) @@ -135,7 +140,7 @@ func (db *DB) UpdateBookmarkStats() error { if stats.History == nil { stats.History = make(map[time.Time]entity.BookmarkInfo) } - stats.History[now] = entity.BookmarkInfo{Bookmarks: bookmarkCount, IndexedWords: indexWordCount} + stats.History[now] = entity.BookmarkInfo{Bookmarks: bookmarkCount} err = db.store.TxUpsert(txn, "stats", &stats) if err != nil { txn.Rollback() diff --git a/db/index_test.go b/db/index_test.go index 8bf640e..e88dbba 100644 --- a/db/index_test.go +++ b/db/index_test.go @@ -5,6 +5,10 @@ import ( "net/http/httptest" "os" "testing" + "time" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/search/query" "github.com/tardisx/linkwallet/entity" ) @@ -141,3 +145,113 @@ func TestTagIndexing(t *testing.T) { t.Error("did not get one id for sloth") } } + +func testBM() entity.Bookmark { + return entity.Bookmark{ + ID: 1, + URL: "https://one.com", + Info: entity.PageInfo{ + Fetched: time.Time{}, + Title: "one web", + Size: 200, + StatusCode: 200, + RawText: "one web site is great for all humans", + }, + Tags: []string{"hello", "big friends"}, + PreserveTitle: false, + TimestampCreated: time.Time{}, + TimestampLastScraped: time.Time{}, + } +} + +func TestMappings(t *testing.T) { + mapping := createIndexMapping() + idx, err := bleve.NewMemOnly(mapping) + if err != nil { + t.Error(err) + t.FailNow() + } + + bm := testBM() + err = idx.Index("1", bm) + if err != nil { + panic(err) + } + + type tc struct { + query query.Query + expHits int + } + tcs := []tc{ + {query: bleve.NewMatchQuery("human"), expHits: 1}, + {query: bleve.NewMatchQuery("humanoid"), expHits: 0}, + {query: bleve.NewMatchQuery("hello"), expHits: 1}, + {query: bleve.NewMatchQuery("big"), expHits: 0}, + {query: bleve.NewMatchQuery("friends"), expHits: 0}, + {query: bleve.NewMatchQuery("big friend"), expHits: 0}, + {query: bleve.NewTermQuery("big friends"), expHits: 1}, + {query: bleve.NewMatchQuery("web great"), expHits: 1}, + } + + for i := range tcs { + q := tcs[i].query + + sr, err := idx.Search(bleve.NewSearchRequest(q)) + if err != nil { + t.Error(err) + } else { + if len(sr.Hits) != tcs[i].expHits { + t.Errorf("wrong hits - expected %d got %d for %s", tcs[i].expHits, len(sr.Hits), tcs[i].query) + } + } + } + +} + +func TestMappingsDisjunctionQuery(t *testing.T) { + mapping := createIndexMapping() + idx, err := bleve.NewMemOnly(mapping) + if err != nil { + t.Error(err) + t.FailNow() + } + + bm := testBM() + err = idx.Index("1", bm) + if err != nil { + panic(err) + } + + type tc struct { + query string + expHits int + } + tcs := []tc{ + {query: "human", expHits: 1}, + {query: "humanoid", expHits: 0}, + {query: "hello", expHits: 1}, + {query: "big", expHits: 0}, + {query: "friends", expHits: 0}, + {query: "big friend", expHits: 0}, + {query: "big friends", expHits: 1}, + {query: "web great", expHits: 1}, + } + + for i := range tcs { + q := tcs[i].query + req := bleve.NewDisjunctionQuery( + bleve.NewMatchQuery(q), + bleve.NewTermQuery(q), + ) + + sr, err := idx.Search(bleve.NewSearchRequest(req)) + if err != nil { + t.Error(err) + } else { + if len(sr.Hits) != tcs[i].expHits { + t.Errorf("wrong hits - expected %d got %d for %s", tcs[i].expHits, len(sr.Hits), tcs[i].query) + } + } + } + +} diff --git a/entity/bookmark.go b/entity/bookmark.go index d859b5c..ad05adf 100644 --- a/entity/bookmark.go +++ b/entity/bookmark.go @@ -1,6 +1,9 @@ package entity -import "time" +import ( + "html/template" + "time" +) type Bookmark struct { ID uint64 `boltholdKey:"ID"` @@ -12,6 +15,10 @@ type Bookmark struct { TimestampLastScraped time.Time } +func (bm Bookmark) Type() string { + return "bookmark" +} + type PageInfo struct { Fetched time.Time Title string @@ -19,3 +26,13 @@ type PageInfo struct { StatusCode int RawText string } + +func (pi PageInfo) Type() string { + return "info" +} + +type BookmarkSearchResult struct { + Bookmark Bookmark + Score float64 + Highlight template.HTML +} diff --git a/entity/index.go b/entity/index.go index 34eb474..9356433 100644 --- a/entity/index.go +++ b/entity/index.go @@ -1,39 +1 @@ package entity - -type WordIndex struct { - Word string `bolthold:"index"` - // Bitmap roaring.Bitmap - Bitmap map[uint64]bool -} - -// func (wi WordIndex) GobEncode() ([]byte, error) { - -// bmBuf := new(bytes.Buffer) -// wi.Bitmap.WriteTo(bmBuf) // we omit error handling - -// wordBytes := []byte(wi.Word) -// serialised := make([]byte, 4, 4) -// binary.BigEndian.PutUint32(serialised, uint32(len(wordBytes))) - -// serialised = append(serialised, wordBytes...) -// serialised = append(serialised, bmBuf.Bytes()...) -// // log.Printf("serialised: %v", serialised) - -// // log.Printf("serialised to %d bytes for word %w\n%#v", len(serialised), wi.Word, serialised) - -// return serialised, nil -// } - -// func (wi *WordIndex) GobDecode(b []byte) error { -// size := binary.BigEndian.Uint32(b[0:4]) -// wi.Word = string(b[4 : size+4]) -// // log.Printf("word is %s size was %d\n%v", wi.Word, size, b) - -// bmBuf := bytes.NewReader(b[size+4:]) - -// wi.Bitmap = *roaring.New() -// _, err := wi.Bitmap.ReadFrom(bmBuf) -// // log.Printf("N: %d, err: %s", n, err) - -// return err -// } diff --git a/entity/meta.go b/entity/meta.go index fab6e1b..ceee807 100644 --- a/entity/meta.go +++ b/entity/meta.go @@ -7,14 +7,14 @@ import ( ) type DBStats struct { - History map[time.Time]BookmarkInfo - FileSize int - Searches int + History map[time.Time]BookmarkInfo + FileSize int + IndexSize int + Searches int } type BookmarkInfo struct { - Bookmarks int - IndexedWords int + Bookmarks int } func (stats DBStats) String() string { @@ -29,7 +29,7 @@ func (stats DBStats) String() string { sort.Slice(dates, func(i, j int) bool { return dates[i].Before(dates[j]) }) for _, k := range dates { - out += fmt.Sprintf("%s - %d bookmarks, %d words indexed\n", k, stats.History[k].Bookmarks, stats.History[k].IndexedWords) + out += fmt.Sprintf("%s - %d bookmarks\n", k, stats.History[k].Bookmarks) } return out } diff --git a/web/templates/info.html b/web/templates/info.html index 59d1803..a9634a5 100644 --- a/web/templates/info.html +++ b/web/templates/info.html @@ -4,15 +4,14 @@
System information
- + + -
Memory in use{{ meminfo }}
Database disk size{{ niceSizeMB .stats.FileSize }}Mb
Bookmarks DB size{{ niceSizeMB .stats.FileSize }}Mb
Bookmarks index size{{ niceSizeMB .stats.IndexSize }}Mb
Bookmarks{{ .stats.MostRecentBookmarkInfo.Bookmarks }}
Words in index{{ .stats.MostRecentBookmarkInfo.IndexedWords }}
Total searches{{ .stats.Searches }}
Database information
-
diff --git a/web/templates/manage_results.html b/web/templates/manage_results.html index 78af685..8e61d7a 100644 --- a/web/templates/manage_results.html +++ b/web/templates/manage_results.html @@ -2,29 +2,29 @@ - {{ template "manage_results_column_header.html" .column.title }} + - {{ template "manage_results_column_header.html" .column.created }} - {{ template "manage_results_column_header.html" .column.scraped }} + + - {{ range .bookmarks }} + {{ range .results }} - + - - + + {{ end }} diff --git a/web/templates/manage_results_column_header.html b/web/templates/manage_results_column_header.html deleted file mode 100644 index 230a6cf..0000000 --- a/web/templates/manage_results_column_header.html +++ /dev/null @@ -1,3 +0,0 @@ - - diff --git a/web/templates/search_results.html b/web/templates/search_results.html index c7eb805..f52b78f 100644 --- a/web/templates/search_results.html +++ b/web/templates/search_results.html @@ -1,5 +1,8 @@ \ No newline at end of file diff --git a/web/web.go b/web/web.go index c0a72c7..9f45ccf 100644 --- a/web/web.go +++ b/web/web.go @@ -50,26 +50,9 @@ type Server struct { } type ColumnInfo struct { - Name string - Param string - Sorted string - Class string -} - -func (c ColumnInfo) URLString() string { - if c.Sorted == "asc" { - return "-" + c.Param - } - return c.Param -} - -func (c ColumnInfo) TitleArrow() string { - if c.Sorted == "asc" { - return "↑" - } else if c.Sorted == "desc" { - return "↓" - } - return "" + Name string + Param string + Class string } // Create creates a new web server instance and sets up routing. @@ -126,9 +109,8 @@ func Create(bmm *db.BookmarkManager, cmm *db.ConfigManager) *Server { }) r.GET("/manage", func(c *gin.Context) { - - allBookmarks, _ := bmm.ListBookmarks() - meta := gin.H{"page": "manage", "config": config, "bookmarks": allBookmarks} + results, _ := bmm.Search(db.SearchOptions{All: true}) + meta := gin.H{"page": "manage", "config": config, "results": results} c.HTML(http.StatusOK, "_layout.html", meta, ) @@ -136,37 +118,18 @@ func Create(bmm *db.BookmarkManager, cmm *db.ConfigManager) *Server { r.POST("/manage/results", func(c *gin.Context) { query := c.PostForm("query") - sort := c.Query("sort") - bookmarks := []entity.Bookmark{} + results := make([]entity.BookmarkSearchResult, 0) if query == "" { - bookmarks, _ = bmm.ListBookmarks() + results, _ = bmm.Search(db.SearchOptions{All: true, Results: 100}) } else { - bookmarks, _ = bmm.Search(db.SearchOptions{Query: query, Sort: sort}) + results, _ = bmm.Search(db.SearchOptions{Query: query}) } - meta := gin.H{"config": config, "bookmarks": bookmarks} + meta := gin.H{"config": config, "results": results} colTitle := &ColumnInfo{Name: "Title/URL", Param: "title"} colCreated := &ColumnInfo{Name: "Created", Param: "created", Class: "show-for-large"} colScraped := &ColumnInfo{Name: "Scraped", Param: "scraped", Class: "show-for-large"} - if sort == "title" { - colTitle.Sorted = "asc" - } - if sort == "-title" { - colTitle.Sorted = "desc" - } - if sort == "scraped" { - colScraped.Sorted = "asc" - } - if sort == "-scraped" { - colScraped.Sorted = "desc" - } - if sort == "created" { - colCreated.Sorted = "asc" - } - if sort == "-created" { - colCreated.Sorted = "desc" - } cols := gin.H{ "title": colTitle, @@ -175,9 +138,7 @@ func Create(bmm *db.BookmarkManager, cmm *db.ConfigManager) *Server { } meta["column"] = cols - c.HTML(http.StatusOK, - "manage_results.html", meta, - ) + c.HTML(http.StatusOK, "manage_results.html", meta) }) @@ -466,10 +427,7 @@ func Create(bmm *db.BookmarkManager, cmm *db.ConfigManager) *Server { func plotPoints(sortedKeys []time.Time, dbStats entity.DBStats, p *plot.Plot, k string) { - if k == "indexed_words" { - p.Title.Text = "Indexed words over time" - p.Y.Label.Text = "Words indexed" - } else if k == "bookmarks" { + if k == "bookmarks" { p.Title.Text = "Bookmarks over time" p.Y.Label.Text = "Bookmarks" } else { @@ -480,9 +438,7 @@ func plotPoints(sortedKeys []time.Time, dbStats entity.DBStats, p *plot.Plot, k pts := make(plotter.XYs, len(sortedKeys)) for i := range sortedKeys { pts[i].X = float64(sortedKeys[i].Unix()) - if k == "indexed_words" { - pts[i].Y = float64(dbStats.History[sortedKeys[i]].IndexedWords) - } else if k == "bookmarks" { + if k == "bookmarks" { pts[i].Y = float64(dbStats.History[sortedKeys[i]].Bookmarks) } else { panic("bad key")
 title tagscreatedscraped
editedit - {{ .Info.Title }} + {{ .Bookmark.Info.Title }}
- {{ niceURL .URL }} + {{ niceURL .Bookmark.URL }}
- {{ range .Tags }} + {{ range .Bookmark.Tags }} {{ . }} {{ end }} {{ (nicetime .TimestampCreated).HumanDuration }} ago{{ (nicetime .TimestampLastScraped).HumanDuration }} ago{{ (nicetime .Bookmark.TimestampCreated).HumanDuration }} ago{{ (nicetime .Bookmark.TimestampLastScraped).HumanDuration }} ago - scrape + scrape
{{ .Name }} {{ .TitleArrow }} -