Mostly done, first cut
This commit is contained in:
@@ -3,9 +3,11 @@ package db
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"html/template"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -25,9 +27,9 @@ type BookmarkManager struct {
|
||||
}
|
||||
|
||||
type SearchOptions struct {
|
||||
Query string
|
||||
Tags []string
|
||||
Sort string
|
||||
All bool
|
||||
Query string
|
||||
Results int
|
||||
}
|
||||
|
||||
func NewBookmarkManager(db *DB) *BookmarkManager {
|
||||
@@ -70,15 +72,15 @@ func (m *BookmarkManager) DeleteBookmark(bm *entity.Bookmark) error {
|
||||
}
|
||||
|
||||
// ListBookmarks returns all bookmarks.
|
||||
func (m *BookmarkManager) ListBookmarks() ([]entity.Bookmark, error) {
|
||||
bookmarks := make([]entity.Bookmark, 0)
|
||||
err := m.db.store.Find(&bookmarks, &bolthold.Query{})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
log.Printf("found %d bookmarks", len(bookmarks))
|
||||
return bookmarks, nil
|
||||
}
|
||||
// func (m *BookmarkManager) ListBookmarks() ([]entity.Bookmark, error) {
|
||||
// bookmarks := make([]entity.Bookmark, 0)
|
||||
// err := m.db.store.Find(&bookmarks, &bolthold.Query{})
|
||||
// if err != nil {
|
||||
// panic(err)
|
||||
// }
|
||||
// log.Printf("found %d bookmarks", len(bookmarks))
|
||||
// return bookmarks, nil
|
||||
// }
|
||||
|
||||
// ExportBookmarks exports all bookmarks to an io.Writer
|
||||
func (m *BookmarkManager) ExportBookmarks(w io.Writer) error {
|
||||
@@ -111,30 +113,47 @@ func (m *BookmarkManager) LoadBookmarkByID(id uint64) entity.Bookmark {
|
||||
return ret
|
||||
}
|
||||
|
||||
func (m *BookmarkManager) Search(opts SearchOptions) ([]entity.Bookmark, error) {
|
||||
found := []entity.Bookmark{}
|
||||
log.Printf("search with query: %s", opts.Query)
|
||||
if opts.Sort != "" {
|
||||
panic("unimplemented sort")
|
||||
}
|
||||
if len(opts.Tags) > 0 {
|
||||
panic("unimplemented tags")
|
||||
func (m *BookmarkManager) Search(opts SearchOptions) ([]entity.BookmarkSearchResult, error) {
|
||||
found := []entity.BookmarkSearchResult{}
|
||||
if opts.All && opts.Query != "" {
|
||||
panic("can't fetch all with query")
|
||||
}
|
||||
|
||||
sr, err := m.db.bleve.Search(bleve.NewSearchRequest(
|
||||
query.NewQueryStringQuery(opts.Query)))
|
||||
var q query.Query
|
||||
|
||||
if opts.All {
|
||||
q = bleve.NewMatchAllQuery()
|
||||
} else {
|
||||
|
||||
q = bleve.NewDisjunctionQuery(
|
||||
bleve.NewMatchQuery(opts.Query),
|
||||
bleve.NewTermQuery(opts.Query),
|
||||
)
|
||||
}
|
||||
|
||||
req := bleve.NewSearchRequest(q)
|
||||
if opts.Results > 0 {
|
||||
req.Size = opts.Results
|
||||
}
|
||||
req.Highlight = bleve.NewHighlightWithStyle("html")
|
||||
|
||||
sr, err := m.db.bleve.Search(req)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
log.Printf("total: %d", sr.Total)
|
||||
log.Printf("string: %s", sr.String())
|
||||
// log.Printf("%#v", m.db.bleve.StatsMap())
|
||||
|
||||
if sr.Total > 0 {
|
||||
for _, dm := range sr.Hits {
|
||||
log.Printf("hit: %s => %s", dm.ID, dm.String())
|
||||
|
||||
id, _ := strconv.ParseUint(dm.ID, 10, 64)
|
||||
found = append(found, m.LoadBookmarkByID(id))
|
||||
bm := m.LoadBookmarkByID(id)
|
||||
bsr := entity.BookmarkSearchResult{
|
||||
Bookmark: bm,
|
||||
Score: dm.Score,
|
||||
Highlight: template.HTML(strings.Join(dm.Fragments["Info.RawText"], "\n")),
|
||||
}
|
||||
found = append(found, bsr)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -255,5 +274,25 @@ func (m *BookmarkManager) Stats() (entity.DBStats, error) {
|
||||
return stats, fmt.Errorf("could not load db file size: %s", err)
|
||||
}
|
||||
stats.FileSize = int(fi.Size())
|
||||
indexSize, err := getBleveIndexSize(m.db.file + ".bleve")
|
||||
if err != nil {
|
||||
return entity.DBStats{}, err
|
||||
}
|
||||
stats.IndexSize = int(indexSize)
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func getBleveIndexSize(path string) (int64, error) {
|
||||
var size int64
|
||||
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() {
|
||||
size += info.Size()
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return size, err
|
||||
}
|
||||
|
||||
27
db/db.go
27
db/db.go
@@ -5,6 +5,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
|
||||
"github.com/blevesearch/bleve/v2/analysis/lang/en"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/tardisx/linkwallet/entity"
|
||||
bolthold "github.com/timshannon/bolthold"
|
||||
@@ -16,6 +19,7 @@ type DB struct {
|
||||
bleve bleve.Index
|
||||
}
|
||||
|
||||
// Open opens the bookmark boltdb, and the bleve index.
|
||||
func (db *DB) Open(path string) error {
|
||||
// options := bolthold.DefaultOptions
|
||||
// options.Dir = dir
|
||||
@@ -47,20 +51,27 @@ func (db *DB) Open(path string) error {
|
||||
}
|
||||
|
||||
func createIndexMapping() mapping.IndexMapping {
|
||||
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
|
||||
englishTextFieldMapping := bleve.NewTextFieldMapping()
|
||||
englishTextFieldMapping.Analyzer = en.AnalyzerName
|
||||
|
||||
// a generic reusable mapping for keyword text
|
||||
keywordFieldMapping := bleve.NewTextFieldMapping()
|
||||
keywordFieldMapping.Analyzer = keyword.Name
|
||||
|
||||
pageInfoMapping := bleve.NewDocumentMapping()
|
||||
pageInfoMapping.AddFieldMappingsAt("Title", bleve.NewTextFieldMapping())
|
||||
pageInfoMapping.AddFieldMappingsAt("Title", englishTextFieldMapping)
|
||||
pageInfoMapping.AddFieldMappingsAt("Size", bleve.NewNumericFieldMapping())
|
||||
pageInfoMapping.AddFieldMappingsAt("RawText", bleve.NewTextFieldMapping())
|
||||
pageInfoMapping.AddFieldMappingsAt("RawText", englishTextFieldMapping)
|
||||
|
||||
bookmarkMapping := bleve.NewDocumentMapping()
|
||||
bookmarkMapping.AddFieldMappingsAt("URL", bleve.NewTextFieldMapping())
|
||||
bookmarkMapping.AddFieldMappingsAt("Tags", bleve.NewTextFieldMapping())
|
||||
bookmarkMapping.AddFieldMappingsAt("Tags", keywordFieldMapping)
|
||||
bookmarkMapping.AddSubDocumentMapping("Info", pageInfoMapping)
|
||||
|
||||
indexMapping.AddDocumentMapping("bookmark", bookmarkMapping)
|
||||
|
||||
return indexMapping
|
||||
}
|
||||
|
||||
@@ -111,17 +122,11 @@ func (db *DB) UpdateBookmarkStats() error {
|
||||
}
|
||||
// count bookmarks and words indexed
|
||||
bmI := entity.Bookmark{}
|
||||
wiI := entity.WordIndex{}
|
||||
bookmarkCount, err := db.store.TxCount(txn, &bmI, &bolthold.Query{})
|
||||
if err != nil {
|
||||
txn.Rollback()
|
||||
return fmt.Errorf("could not get bookmark count: %s", err)
|
||||
}
|
||||
indexWordCount, err := db.store.TxCount(txn, &wiI, &bolthold.Query{})
|
||||
if err != nil {
|
||||
txn.Rollback()
|
||||
return fmt.Errorf("could not get index word count: %s", err)
|
||||
}
|
||||
|
||||
// bucket these stats by day
|
||||
now := time.Now().Truncate(time.Hour * 24)
|
||||
@@ -135,7 +140,7 @@ func (db *DB) UpdateBookmarkStats() error {
|
||||
if stats.History == nil {
|
||||
stats.History = make(map[time.Time]entity.BookmarkInfo)
|
||||
}
|
||||
stats.History[now] = entity.BookmarkInfo{Bookmarks: bookmarkCount, IndexedWords: indexWordCount}
|
||||
stats.History[now] = entity.BookmarkInfo{Bookmarks: bookmarkCount}
|
||||
err = db.store.TxUpsert(txn, "stats", &stats)
|
||||
if err != nil {
|
||||
txn.Rollback()
|
||||
|
||||
114
db/index_test.go
114
db/index_test.go
@@ -5,6 +5,10 @@ import (
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
"github.com/blevesearch/bleve/v2/search/query"
|
||||
|
||||
"github.com/tardisx/linkwallet/entity"
|
||||
)
|
||||
@@ -141,3 +145,113 @@ func TestTagIndexing(t *testing.T) {
|
||||
t.Error("did not get one id for sloth")
|
||||
}
|
||||
}
|
||||
|
||||
func testBM() entity.Bookmark {
|
||||
return entity.Bookmark{
|
||||
ID: 1,
|
||||
URL: "https://one.com",
|
||||
Info: entity.PageInfo{
|
||||
Fetched: time.Time{},
|
||||
Title: "one web",
|
||||
Size: 200,
|
||||
StatusCode: 200,
|
||||
RawText: "one web site is great for all humans",
|
||||
},
|
||||
Tags: []string{"hello", "big friends"},
|
||||
PreserveTitle: false,
|
||||
TimestampCreated: time.Time{},
|
||||
TimestampLastScraped: time.Time{},
|
||||
}
|
||||
}
|
||||
|
||||
func TestMappings(t *testing.T) {
|
||||
mapping := createIndexMapping()
|
||||
idx, err := bleve.NewMemOnly(mapping)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
t.FailNow()
|
||||
}
|
||||
|
||||
bm := testBM()
|
||||
err = idx.Index("1", bm)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
type tc struct {
|
||||
query query.Query
|
||||
expHits int
|
||||
}
|
||||
tcs := []tc{
|
||||
{query: bleve.NewMatchQuery("human"), expHits: 1},
|
||||
{query: bleve.NewMatchQuery("humanoid"), expHits: 0},
|
||||
{query: bleve.NewMatchQuery("hello"), expHits: 1},
|
||||
{query: bleve.NewMatchQuery("big"), expHits: 0},
|
||||
{query: bleve.NewMatchQuery("friends"), expHits: 0},
|
||||
{query: bleve.NewMatchQuery("big friend"), expHits: 0},
|
||||
{query: bleve.NewTermQuery("big friends"), expHits: 1},
|
||||
{query: bleve.NewMatchQuery("web great"), expHits: 1},
|
||||
}
|
||||
|
||||
for i := range tcs {
|
||||
q := tcs[i].query
|
||||
|
||||
sr, err := idx.Search(bleve.NewSearchRequest(q))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
} else {
|
||||
if len(sr.Hits) != tcs[i].expHits {
|
||||
t.Errorf("wrong hits - expected %d got %d for %s", tcs[i].expHits, len(sr.Hits), tcs[i].query)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestMappingsDisjunctionQuery(t *testing.T) {
|
||||
mapping := createIndexMapping()
|
||||
idx, err := bleve.NewMemOnly(mapping)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
t.FailNow()
|
||||
}
|
||||
|
||||
bm := testBM()
|
||||
err = idx.Index("1", bm)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
type tc struct {
|
||||
query string
|
||||
expHits int
|
||||
}
|
||||
tcs := []tc{
|
||||
{query: "human", expHits: 1},
|
||||
{query: "humanoid", expHits: 0},
|
||||
{query: "hello", expHits: 1},
|
||||
{query: "big", expHits: 0},
|
||||
{query: "friends", expHits: 0},
|
||||
{query: "big friend", expHits: 0},
|
||||
{query: "big friends", expHits: 1},
|
||||
{query: "web great", expHits: 1},
|
||||
}
|
||||
|
||||
for i := range tcs {
|
||||
q := tcs[i].query
|
||||
req := bleve.NewDisjunctionQuery(
|
||||
bleve.NewMatchQuery(q),
|
||||
bleve.NewTermQuery(q),
|
||||
)
|
||||
|
||||
sr, err := idx.Search(bleve.NewSearchRequest(req))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
} else {
|
||||
if len(sr.Hits) != tcs[i].expHits {
|
||||
t.Errorf("wrong hits - expected %d got %d for %s", tcs[i].expHits, len(sr.Hits), tcs[i].query)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user