diff --git a/db/bookmarks.go b/db/bookmarks.go index 509099d..5adc509 100644 --- a/db/bookmarks.go +++ b/db/bookmarks.go @@ -3,9 +3,11 @@ package db import ( "errors" "fmt" + "html/template" "io" "log" "os" + "path/filepath" "strconv" "strings" "sync" @@ -25,9 +27,9 @@ type BookmarkManager struct { } type SearchOptions struct { - Query string - Tags []string - Sort string + All bool + Query string + Results int } func NewBookmarkManager(db *DB) *BookmarkManager { @@ -70,15 +72,15 @@ func (m *BookmarkManager) DeleteBookmark(bm *entity.Bookmark) error { } // ListBookmarks returns all bookmarks. -func (m *BookmarkManager) ListBookmarks() ([]entity.Bookmark, error) { - bookmarks := make([]entity.Bookmark, 0) - err := m.db.store.Find(&bookmarks, &bolthold.Query{}) - if err != nil { - panic(err) - } - log.Printf("found %d bookmarks", len(bookmarks)) - return bookmarks, nil -} +// func (m *BookmarkManager) ListBookmarks() ([]entity.Bookmark, error) { +// bookmarks := make([]entity.Bookmark, 0) +// err := m.db.store.Find(&bookmarks, &bolthold.Query{}) +// if err != nil { +// panic(err) +// } +// log.Printf("found %d bookmarks", len(bookmarks)) +// return bookmarks, nil +// } // ExportBookmarks exports all bookmarks to an io.Writer func (m *BookmarkManager) ExportBookmarks(w io.Writer) error { @@ -111,30 +113,47 @@ func (m *BookmarkManager) LoadBookmarkByID(id uint64) entity.Bookmark { return ret } -func (m *BookmarkManager) Search(opts SearchOptions) ([]entity.Bookmark, error) { - found := []entity.Bookmark{} - log.Printf("search with query: %s", opts.Query) - if opts.Sort != "" { - panic("unimplemented sort") - } - if len(opts.Tags) > 0 { - panic("unimplemented tags") +func (m *BookmarkManager) Search(opts SearchOptions) ([]entity.BookmarkSearchResult, error) { + found := []entity.BookmarkSearchResult{} + if opts.All && opts.Query != "" { + panic("can't fetch all with query") } - sr, err := m.db.bleve.Search(bleve.NewSearchRequest( - query.NewQueryStringQuery(opts.Query))) + var q query.Query + + if opts.All { + q = bleve.NewMatchAllQuery() + } else { + + q = bleve.NewDisjunctionQuery( + bleve.NewMatchQuery(opts.Query), + bleve.NewTermQuery(opts.Query), + ) + } + + req := bleve.NewSearchRequest(q) + if opts.Results > 0 { + req.Size = opts.Results + } + req.Highlight = bleve.NewHighlightWithStyle("html") + + sr, err := m.db.bleve.Search(req) if err != nil { panic(err) } - log.Printf("total: %d", sr.Total) - log.Printf("string: %s", sr.String()) // log.Printf("%#v", m.db.bleve.StatsMap()) if sr.Total > 0 { for _, dm := range sr.Hits { - log.Printf("hit: %s => %s", dm.ID, dm.String()) + id, _ := strconv.ParseUint(dm.ID, 10, 64) - found = append(found, m.LoadBookmarkByID(id)) + bm := m.LoadBookmarkByID(id) + bsr := entity.BookmarkSearchResult{ + Bookmark: bm, + Score: dm.Score, + Highlight: template.HTML(strings.Join(dm.Fragments["Info.RawText"], "\n")), + } + found = append(found, bsr) } } @@ -255,5 +274,25 @@ func (m *BookmarkManager) Stats() (entity.DBStats, error) { return stats, fmt.Errorf("could not load db file size: %s", err) } stats.FileSize = int(fi.Size()) + indexSize, err := getBleveIndexSize(m.db.file + ".bleve") + if err != nil { + return entity.DBStats{}, err + } + stats.IndexSize = int(indexSize) + return stats, nil } + +func getBleveIndexSize(path string) (int64, error) { + var size int64 + err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() { + size += info.Size() + } + return nil + }) + return size, err +} diff --git a/db/db.go b/db/db.go index 244f232..c5d0a7d 100644 --- a/db/db.go +++ b/db/db.go @@ -5,6 +5,9 @@ import ( "time" "github.com/blevesearch/bleve/v2" + + "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" + "github.com/blevesearch/bleve/v2/analysis/lang/en" "github.com/blevesearch/bleve/v2/mapping" "github.com/tardisx/linkwallet/entity" bolthold "github.com/timshannon/bolthold" @@ -16,6 +19,7 @@ type DB struct { bleve bleve.Index } +// Open opens the bookmark boltdb, and the bleve index. func (db *DB) Open(path string) error { // options := bolthold.DefaultOptions // options.Dir = dir @@ -47,20 +51,27 @@ func (db *DB) Open(path string) error { } func createIndexMapping() mapping.IndexMapping { - indexMapping := bleve.NewIndexMapping() + englishTextFieldMapping := bleve.NewTextFieldMapping() + englishTextFieldMapping.Analyzer = en.AnalyzerName + + // a generic reusable mapping for keyword text + keywordFieldMapping := bleve.NewTextFieldMapping() + keywordFieldMapping.Analyzer = keyword.Name + pageInfoMapping := bleve.NewDocumentMapping() - pageInfoMapping.AddFieldMappingsAt("Title", bleve.NewTextFieldMapping()) + pageInfoMapping.AddFieldMappingsAt("Title", englishTextFieldMapping) pageInfoMapping.AddFieldMappingsAt("Size", bleve.NewNumericFieldMapping()) - pageInfoMapping.AddFieldMappingsAt("RawText", bleve.NewTextFieldMapping()) + pageInfoMapping.AddFieldMappingsAt("RawText", englishTextFieldMapping) bookmarkMapping := bleve.NewDocumentMapping() bookmarkMapping.AddFieldMappingsAt("URL", bleve.NewTextFieldMapping()) - bookmarkMapping.AddFieldMappingsAt("Tags", bleve.NewTextFieldMapping()) + bookmarkMapping.AddFieldMappingsAt("Tags", keywordFieldMapping) bookmarkMapping.AddSubDocumentMapping("Info", pageInfoMapping) indexMapping.AddDocumentMapping("bookmark", bookmarkMapping) + return indexMapping } @@ -111,17 +122,11 @@ func (db *DB) UpdateBookmarkStats() error { } // count bookmarks and words indexed bmI := entity.Bookmark{} - wiI := entity.WordIndex{} bookmarkCount, err := db.store.TxCount(txn, &bmI, &bolthold.Query{}) if err != nil { txn.Rollback() return fmt.Errorf("could not get bookmark count: %s", err) } - indexWordCount, err := db.store.TxCount(txn, &wiI, &bolthold.Query{}) - if err != nil { - txn.Rollback() - return fmt.Errorf("could not get index word count: %s", err) - } // bucket these stats by day now := time.Now().Truncate(time.Hour * 24) @@ -135,7 +140,7 @@ func (db *DB) UpdateBookmarkStats() error { if stats.History == nil { stats.History = make(map[time.Time]entity.BookmarkInfo) } - stats.History[now] = entity.BookmarkInfo{Bookmarks: bookmarkCount, IndexedWords: indexWordCount} + stats.History[now] = entity.BookmarkInfo{Bookmarks: bookmarkCount} err = db.store.TxUpsert(txn, "stats", &stats) if err != nil { txn.Rollback() diff --git a/db/index_test.go b/db/index_test.go index 8bf640e..e88dbba 100644 --- a/db/index_test.go +++ b/db/index_test.go @@ -5,6 +5,10 @@ import ( "net/http/httptest" "os" "testing" + "time" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/search/query" "github.com/tardisx/linkwallet/entity" ) @@ -141,3 +145,113 @@ func TestTagIndexing(t *testing.T) { t.Error("did not get one id for sloth") } } + +func testBM() entity.Bookmark { + return entity.Bookmark{ + ID: 1, + URL: "https://one.com", + Info: entity.PageInfo{ + Fetched: time.Time{}, + Title: "one web", + Size: 200, + StatusCode: 200, + RawText: "one web site is great for all humans", + }, + Tags: []string{"hello", "big friends"}, + PreserveTitle: false, + TimestampCreated: time.Time{}, + TimestampLastScraped: time.Time{}, + } +} + +func TestMappings(t *testing.T) { + mapping := createIndexMapping() + idx, err := bleve.NewMemOnly(mapping) + if err != nil { + t.Error(err) + t.FailNow() + } + + bm := testBM() + err = idx.Index("1", bm) + if err != nil { + panic(err) + } + + type tc struct { + query query.Query + expHits int + } + tcs := []tc{ + {query: bleve.NewMatchQuery("human"), expHits: 1}, + {query: bleve.NewMatchQuery("humanoid"), expHits: 0}, + {query: bleve.NewMatchQuery("hello"), expHits: 1}, + {query: bleve.NewMatchQuery("big"), expHits: 0}, + {query: bleve.NewMatchQuery("friends"), expHits: 0}, + {query: bleve.NewMatchQuery("big friend"), expHits: 0}, + {query: bleve.NewTermQuery("big friends"), expHits: 1}, + {query: bleve.NewMatchQuery("web great"), expHits: 1}, + } + + for i := range tcs { + q := tcs[i].query + + sr, err := idx.Search(bleve.NewSearchRequest(q)) + if err != nil { + t.Error(err) + } else { + if len(sr.Hits) != tcs[i].expHits { + t.Errorf("wrong hits - expected %d got %d for %s", tcs[i].expHits, len(sr.Hits), tcs[i].query) + } + } + } + +} + +func TestMappingsDisjunctionQuery(t *testing.T) { + mapping := createIndexMapping() + idx, err := bleve.NewMemOnly(mapping) + if err != nil { + t.Error(err) + t.FailNow() + } + + bm := testBM() + err = idx.Index("1", bm) + if err != nil { + panic(err) + } + + type tc struct { + query string + expHits int + } + tcs := []tc{ + {query: "human", expHits: 1}, + {query: "humanoid", expHits: 0}, + {query: "hello", expHits: 1}, + {query: "big", expHits: 0}, + {query: "friends", expHits: 0}, + {query: "big friend", expHits: 0}, + {query: "big friends", expHits: 1}, + {query: "web great", expHits: 1}, + } + + for i := range tcs { + q := tcs[i].query + req := bleve.NewDisjunctionQuery( + bleve.NewMatchQuery(q), + bleve.NewTermQuery(q), + ) + + sr, err := idx.Search(bleve.NewSearchRequest(req)) + if err != nil { + t.Error(err) + } else { + if len(sr.Hits) != tcs[i].expHits { + t.Errorf("wrong hits - expected %d got %d for %s", tcs[i].expHits, len(sr.Hits), tcs[i].query) + } + } + } + +} diff --git a/entity/bookmark.go b/entity/bookmark.go index d859b5c..ad05adf 100644 --- a/entity/bookmark.go +++ b/entity/bookmark.go @@ -1,6 +1,9 @@ package entity -import "time" +import ( + "html/template" + "time" +) type Bookmark struct { ID uint64 `boltholdKey:"ID"` @@ -12,6 +15,10 @@ type Bookmark struct { TimestampLastScraped time.Time } +func (bm Bookmark) Type() string { + return "bookmark" +} + type PageInfo struct { Fetched time.Time Title string @@ -19,3 +26,13 @@ type PageInfo struct { StatusCode int RawText string } + +func (pi PageInfo) Type() string { + return "info" +} + +type BookmarkSearchResult struct { + Bookmark Bookmark + Score float64 + Highlight template.HTML +} diff --git a/entity/index.go b/entity/index.go index 34eb474..9356433 100644 --- a/entity/index.go +++ b/entity/index.go @@ -1,39 +1 @@ package entity - -type WordIndex struct { - Word string `bolthold:"index"` - // Bitmap roaring.Bitmap - Bitmap map[uint64]bool -} - -// func (wi WordIndex) GobEncode() ([]byte, error) { - -// bmBuf := new(bytes.Buffer) -// wi.Bitmap.WriteTo(bmBuf) // we omit error handling - -// wordBytes := []byte(wi.Word) -// serialised := make([]byte, 4, 4) -// binary.BigEndian.PutUint32(serialised, uint32(len(wordBytes))) - -// serialised = append(serialised, wordBytes...) -// serialised = append(serialised, bmBuf.Bytes()...) -// // log.Printf("serialised: %v", serialised) - -// // log.Printf("serialised to %d bytes for word %w\n%#v", len(serialised), wi.Word, serialised) - -// return serialised, nil -// } - -// func (wi *WordIndex) GobDecode(b []byte) error { -// size := binary.BigEndian.Uint32(b[0:4]) -// wi.Word = string(b[4 : size+4]) -// // log.Printf("word is %s size was %d\n%v", wi.Word, size, b) - -// bmBuf := bytes.NewReader(b[size+4:]) - -// wi.Bitmap = *roaring.New() -// _, err := wi.Bitmap.ReadFrom(bmBuf) -// // log.Printf("N: %d, err: %s", n, err) - -// return err -// } diff --git a/entity/meta.go b/entity/meta.go index fab6e1b..ceee807 100644 --- a/entity/meta.go +++ b/entity/meta.go @@ -7,14 +7,14 @@ import ( ) type DBStats struct { - History map[time.Time]BookmarkInfo - FileSize int - Searches int + History map[time.Time]BookmarkInfo + FileSize int + IndexSize int + Searches int } type BookmarkInfo struct { - Bookmarks int - IndexedWords int + Bookmarks int } func (stats DBStats) String() string { @@ -29,7 +29,7 @@ func (stats DBStats) String() string { sort.Slice(dates, func(i, j int) bool { return dates[i].Before(dates[j]) }) for _, k := range dates { - out += fmt.Sprintf("%s - %d bookmarks, %d words indexed\n", k, stats.History[k].Bookmarks, stats.History[k].IndexedWords) + out += fmt.Sprintf("%s - %d bookmarks\n", k, stats.History[k].Bookmarks) } return out } diff --git a/web/templates/info.html b/web/templates/info.html index 59d1803..a9634a5 100644 --- a/web/templates/info.html +++ b/web/templates/info.html @@ -4,15 +4,14 @@
Memory in use | {{ meminfo }} |
---|---|
Database disk size | {{ niceSizeMB .stats.FileSize }}Mb |
Bookmarks DB size | {{ niceSizeMB .stats.FileSize }}Mb |
Bookmarks index size | {{ niceSizeMB .stats.IndexSize }}Mb |
Bookmarks | {{ .stats.MostRecentBookmarkInfo.Bookmarks }} |
Words in index | {{ .stats.MostRecentBookmarkInfo.IndexedWords }} |
Total searches | {{ .stats.Searches }} |
- {{ template "manage_results_column_header.html" .column.title }} + | title | tags | - {{ template "manage_results_column_header.html" .column.created }} - {{ template "manage_results_column_header.html" .column.scraped }} +created | +scraped | ||||
---|---|---|---|---|---|---|---|---|
edit | +edit |
- {{ .Info.Title }}
+ {{ .Bookmark.Info.Title }}
- {{ niceURL .URL }} + {{ niceURL .Bookmark.URL }} |
- {{ range .Tags }} + {{ range .Bookmark.Tags }} {{ . }} {{ end }} | -{{ (nicetime .TimestampCreated).HumanDuration }} ago | -{{ (nicetime .TimestampLastScraped).HumanDuration }} ago | +{{ (nicetime .Bookmark.TimestampCreated).HumanDuration }} ago | +{{ (nicetime .Bookmark.TimestampLastScraped).HumanDuration }} ago | - scrape + scrape | {{ .Name }} {{ .TitleArrow }} - | diff --git a/web/templates/search_results.html b/web/templates/search_results.html index c7eb805..f52b78f 100644 --- a/web/templates/search_results.html +++ b/web/templates/search_results.html @@ -1,5 +1,8 @@