Mostly done, first cut

This commit is contained in:
Justin Hawkins 2025-05-01 23:39:51 +09:30
parent badbe5e92f
commit 58b6692d1b
11 changed files with 248 additions and 156 deletions

View File

@ -3,9 +3,11 @@ package db
import (
"errors"
"fmt"
"html/template"
"io"
"log"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
@ -25,9 +27,9 @@ type BookmarkManager struct {
}
type SearchOptions struct {
Query string
Tags []string
Sort string
All bool
Query string
Results int
}
func NewBookmarkManager(db *DB) *BookmarkManager {
@ -70,15 +72,15 @@ func (m *BookmarkManager) DeleteBookmark(bm *entity.Bookmark) error {
}
// ListBookmarks returns all bookmarks.
func (m *BookmarkManager) ListBookmarks() ([]entity.Bookmark, error) {
bookmarks := make([]entity.Bookmark, 0)
err := m.db.store.Find(&bookmarks, &bolthold.Query{})
if err != nil {
panic(err)
}
log.Printf("found %d bookmarks", len(bookmarks))
return bookmarks, nil
}
// func (m *BookmarkManager) ListBookmarks() ([]entity.Bookmark, error) {
// bookmarks := make([]entity.Bookmark, 0)
// err := m.db.store.Find(&bookmarks, &bolthold.Query{})
// if err != nil {
// panic(err)
// }
// log.Printf("found %d bookmarks", len(bookmarks))
// return bookmarks, nil
// }
// ExportBookmarks exports all bookmarks to an io.Writer
func (m *BookmarkManager) ExportBookmarks(w io.Writer) error {
@ -111,30 +113,47 @@ func (m *BookmarkManager) LoadBookmarkByID(id uint64) entity.Bookmark {
return ret
}
func (m *BookmarkManager) Search(opts SearchOptions) ([]entity.Bookmark, error) {
found := []entity.Bookmark{}
log.Printf("search with query: %s", opts.Query)
if opts.Sort != "" {
panic("unimplemented sort")
}
if len(opts.Tags) > 0 {
panic("unimplemented tags")
func (m *BookmarkManager) Search(opts SearchOptions) ([]entity.BookmarkSearchResult, error) {
found := []entity.BookmarkSearchResult{}
if opts.All && opts.Query != "" {
panic("can't fetch all with query")
}
sr, err := m.db.bleve.Search(bleve.NewSearchRequest(
query.NewQueryStringQuery(opts.Query)))
var q query.Query
if opts.All {
q = bleve.NewMatchAllQuery()
} else {
q = bleve.NewDisjunctionQuery(
bleve.NewMatchQuery(opts.Query),
bleve.NewTermQuery(opts.Query),
)
}
req := bleve.NewSearchRequest(q)
if opts.Results > 0 {
req.Size = opts.Results
}
req.Highlight = bleve.NewHighlightWithStyle("html")
sr, err := m.db.bleve.Search(req)
if err != nil {
panic(err)
}
log.Printf("total: %d", sr.Total)
log.Printf("string: %s", sr.String())
// log.Printf("%#v", m.db.bleve.StatsMap())
if sr.Total > 0 {
for _, dm := range sr.Hits {
log.Printf("hit: %s => %s", dm.ID, dm.String())
id, _ := strconv.ParseUint(dm.ID, 10, 64)
found = append(found, m.LoadBookmarkByID(id))
bm := m.LoadBookmarkByID(id)
bsr := entity.BookmarkSearchResult{
Bookmark: bm,
Score: dm.Score,
Highlight: template.HTML(strings.Join(dm.Fragments["Info.RawText"], "\n")),
}
found = append(found, bsr)
}
}
@ -255,5 +274,25 @@ func (m *BookmarkManager) Stats() (entity.DBStats, error) {
return stats, fmt.Errorf("could not load db file size: %s", err)
}
stats.FileSize = int(fi.Size())
indexSize, err := getBleveIndexSize(m.db.file + ".bleve")
if err != nil {
return entity.DBStats{}, err
}
stats.IndexSize = int(indexSize)
return stats, nil
}
func getBleveIndexSize(path string) (int64, error) {
var size int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
size += info.Size()
}
return nil
})
return size, err
}

View File

@ -5,6 +5,9 @@ import (
"time"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/analysis/lang/en"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/tardisx/linkwallet/entity"
bolthold "github.com/timshannon/bolthold"
@ -16,6 +19,7 @@ type DB struct {
bleve bleve.Index
}
// Open opens the bookmark boltdb, and the bleve index.
func (db *DB) Open(path string) error {
// options := bolthold.DefaultOptions
// options.Dir = dir
@ -47,20 +51,27 @@ func (db *DB) Open(path string) error {
}
func createIndexMapping() mapping.IndexMapping {
indexMapping := bleve.NewIndexMapping()
englishTextFieldMapping := bleve.NewTextFieldMapping()
englishTextFieldMapping.Analyzer = en.AnalyzerName
// a generic reusable mapping for keyword text
keywordFieldMapping := bleve.NewTextFieldMapping()
keywordFieldMapping.Analyzer = keyword.Name
pageInfoMapping := bleve.NewDocumentMapping()
pageInfoMapping.AddFieldMappingsAt("Title", bleve.NewTextFieldMapping())
pageInfoMapping.AddFieldMappingsAt("Title", englishTextFieldMapping)
pageInfoMapping.AddFieldMappingsAt("Size", bleve.NewNumericFieldMapping())
pageInfoMapping.AddFieldMappingsAt("RawText", bleve.NewTextFieldMapping())
pageInfoMapping.AddFieldMappingsAt("RawText", englishTextFieldMapping)
bookmarkMapping := bleve.NewDocumentMapping()
bookmarkMapping.AddFieldMappingsAt("URL", bleve.NewTextFieldMapping())
bookmarkMapping.AddFieldMappingsAt("Tags", bleve.NewTextFieldMapping())
bookmarkMapping.AddFieldMappingsAt("Tags", keywordFieldMapping)
bookmarkMapping.AddSubDocumentMapping("Info", pageInfoMapping)
indexMapping.AddDocumentMapping("bookmark", bookmarkMapping)
return indexMapping
}
@ -111,17 +122,11 @@ func (db *DB) UpdateBookmarkStats() error {
}
// count bookmarks and words indexed
bmI := entity.Bookmark{}
wiI := entity.WordIndex{}
bookmarkCount, err := db.store.TxCount(txn, &bmI, &bolthold.Query{})
if err != nil {
txn.Rollback()
return fmt.Errorf("could not get bookmark count: %s", err)
}
indexWordCount, err := db.store.TxCount(txn, &wiI, &bolthold.Query{})
if err != nil {
txn.Rollback()
return fmt.Errorf("could not get index word count: %s", err)
}
// bucket these stats by day
now := time.Now().Truncate(time.Hour * 24)
@ -135,7 +140,7 @@ func (db *DB) UpdateBookmarkStats() error {
if stats.History == nil {
stats.History = make(map[time.Time]entity.BookmarkInfo)
}
stats.History[now] = entity.BookmarkInfo{Bookmarks: bookmarkCount, IndexedWords: indexWordCount}
stats.History[now] = entity.BookmarkInfo{Bookmarks: bookmarkCount}
err = db.store.TxUpsert(txn, "stats", &stats)
if err != nil {
txn.Rollback()

View File

@ -5,6 +5,10 @@ import (
"net/http/httptest"
"os"
"testing"
"time"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/search/query"
"github.com/tardisx/linkwallet/entity"
)
@ -141,3 +145,113 @@ func TestTagIndexing(t *testing.T) {
t.Error("did not get one id for sloth")
}
}
func testBM() entity.Bookmark {
return entity.Bookmark{
ID: 1,
URL: "https://one.com",
Info: entity.PageInfo{
Fetched: time.Time{},
Title: "one web",
Size: 200,
StatusCode: 200,
RawText: "one web site is great for all humans",
},
Tags: []string{"hello", "big friends"},
PreserveTitle: false,
TimestampCreated: time.Time{},
TimestampLastScraped: time.Time{},
}
}
func TestMappings(t *testing.T) {
mapping := createIndexMapping()
idx, err := bleve.NewMemOnly(mapping)
if err != nil {
t.Error(err)
t.FailNow()
}
bm := testBM()
err = idx.Index("1", bm)
if err != nil {
panic(err)
}
type tc struct {
query query.Query
expHits int
}
tcs := []tc{
{query: bleve.NewMatchQuery("human"), expHits: 1},
{query: bleve.NewMatchQuery("humanoid"), expHits: 0},
{query: bleve.NewMatchQuery("hello"), expHits: 1},
{query: bleve.NewMatchQuery("big"), expHits: 0},
{query: bleve.NewMatchQuery("friends"), expHits: 0},
{query: bleve.NewMatchQuery("big friend"), expHits: 0},
{query: bleve.NewTermQuery("big friends"), expHits: 1},
{query: bleve.NewMatchQuery("web great"), expHits: 1},
}
for i := range tcs {
q := tcs[i].query
sr, err := idx.Search(bleve.NewSearchRequest(q))
if err != nil {
t.Error(err)
} else {
if len(sr.Hits) != tcs[i].expHits {
t.Errorf("wrong hits - expected %d got %d for %s", tcs[i].expHits, len(sr.Hits), tcs[i].query)
}
}
}
}
func TestMappingsDisjunctionQuery(t *testing.T) {
mapping := createIndexMapping()
idx, err := bleve.NewMemOnly(mapping)
if err != nil {
t.Error(err)
t.FailNow()
}
bm := testBM()
err = idx.Index("1", bm)
if err != nil {
panic(err)
}
type tc struct {
query string
expHits int
}
tcs := []tc{
{query: "human", expHits: 1},
{query: "humanoid", expHits: 0},
{query: "hello", expHits: 1},
{query: "big", expHits: 0},
{query: "friends", expHits: 0},
{query: "big friend", expHits: 0},
{query: "big friends", expHits: 1},
{query: "web great", expHits: 1},
}
for i := range tcs {
q := tcs[i].query
req := bleve.NewDisjunctionQuery(
bleve.NewMatchQuery(q),
bleve.NewTermQuery(q),
)
sr, err := idx.Search(bleve.NewSearchRequest(req))
if err != nil {
t.Error(err)
} else {
if len(sr.Hits) != tcs[i].expHits {
t.Errorf("wrong hits - expected %d got %d for %s", tcs[i].expHits, len(sr.Hits), tcs[i].query)
}
}
}
}

View File

@ -1,6 +1,9 @@
package entity
import "time"
import (
"html/template"
"time"
)
type Bookmark struct {
ID uint64 `boltholdKey:"ID"`
@ -12,6 +15,10 @@ type Bookmark struct {
TimestampLastScraped time.Time
}
func (bm Bookmark) Type() string {
return "bookmark"
}
type PageInfo struct {
Fetched time.Time
Title string
@ -19,3 +26,13 @@ type PageInfo struct {
StatusCode int
RawText string
}
func (pi PageInfo) Type() string {
return "info"
}
type BookmarkSearchResult struct {
Bookmark Bookmark
Score float64
Highlight template.HTML
}

View File

@ -1,39 +1 @@
package entity
type WordIndex struct {
Word string `bolthold:"index"`
// Bitmap roaring.Bitmap
Bitmap map[uint64]bool
}
// func (wi WordIndex) GobEncode() ([]byte, error) {
// bmBuf := new(bytes.Buffer)
// wi.Bitmap.WriteTo(bmBuf) // we omit error handling
// wordBytes := []byte(wi.Word)
// serialised := make([]byte, 4, 4)
// binary.BigEndian.PutUint32(serialised, uint32(len(wordBytes)))
// serialised = append(serialised, wordBytes...)
// serialised = append(serialised, bmBuf.Bytes()...)
// // log.Printf("serialised: %v", serialised)
// // log.Printf("serialised to %d bytes for word %w\n%#v", len(serialised), wi.Word, serialised)
// return serialised, nil
// }
// func (wi *WordIndex) GobDecode(b []byte) error {
// size := binary.BigEndian.Uint32(b[0:4])
// wi.Word = string(b[4 : size+4])
// // log.Printf("word is %s size was %d\n%v", wi.Word, size, b)
// bmBuf := bytes.NewReader(b[size+4:])
// wi.Bitmap = *roaring.New()
// _, err := wi.Bitmap.ReadFrom(bmBuf)
// // log.Printf("N: %d, err: %s", n, err)
// return err
// }

View File

@ -7,14 +7,14 @@ import (
)
type DBStats struct {
History map[time.Time]BookmarkInfo
FileSize int
Searches int
History map[time.Time]BookmarkInfo
FileSize int
IndexSize int
Searches int
}
type BookmarkInfo struct {
Bookmarks int
IndexedWords int
Bookmarks int
}
func (stats DBStats) String() string {
@ -29,7 +29,7 @@ func (stats DBStats) String() string {
sort.Slice(dates, func(i, j int) bool { return dates[i].Before(dates[j]) })
for _, k := range dates {
out += fmt.Sprintf("%s - %d bookmarks, %d words indexed\n", k, stats.History[k].Bookmarks, stats.History[k].IndexedWords)
out += fmt.Sprintf("%s - %d bookmarks\n", k, stats.History[k].Bookmarks)
}
return out
}

View File

@ -4,15 +4,14 @@
<h5>System information</h5>
<table>
<tr><th>Memory in use</th><td>{{ meminfo }}</td></tr>
<tr><th>Database disk size</th><td>{{ niceSizeMB .stats.FileSize }}Mb</td></tr>
<tr><th>Bookmarks DB size</th><td>{{ niceSizeMB .stats.FileSize }}Mb</td></tr>
<tr><th>Bookmarks index size</th><td>{{ niceSizeMB .stats.IndexSize }}Mb</td></tr>
<tr><th>Bookmarks</th><td>{{ .stats.MostRecentBookmarkInfo.Bookmarks }}</td></tr>
<tr><th>Words in index</th><td>{{ .stats.MostRecentBookmarkInfo.IndexedWords }}</td></tr>
<tr><th>Total searches</th><td>{{ .stats.Searches }}</td></tr>
</table>
<h5>Database information</h5>
<img src="/graph/bookmarks">
<img src="/graph/indexed_words">
</div>
<div class="large-6 medium-12 cell">

View File

@ -2,29 +2,29 @@
<table id="manage-results">
<tr>
<th>&nbsp;</th>
{{ template "manage_results_column_header.html" .column.title }}
<th>title</th>
<th>tags</th>
{{ template "manage_results_column_header.html" .column.created }}
{{ template "manage_results_column_header.html" .column.scraped }}
<th>created</th>
<th>scraped</th>
</tr>
{{ range .bookmarks }}
{{ range .results }}
<tr>
<th><a class="button" href="/edit/{{ .ID }}">edit</a></th>
<th><a class="button" href="/edit/{{ .Bookmark.ID }}">edit</a></th>
<td>
<a href="{{ .URL }}">{{ .Info.Title }}</a>
<a href="{{ .Bookmark.URL }}">{{ .Bookmark.Info.Title }}</a>
<br>
<a href="{{ .URL }}">{{ niceURL .URL }}</a>
<a href="{{ .Bookmark.URL }}">{{ niceURL .Bookmark.URL }}</a>
</td>
<td>
{{ range .Tags }}
{{ range .Bookmark.Tags }}
<span class="label primary">{{ . }}</span>
{{ end }}
</td>
<td class="show-for-large">{{ (nicetime .TimestampCreated).HumanDuration }} ago</td>
<td class="show-for-large">{{ (nicetime .TimestampLastScraped).HumanDuration }} ago</td>
<td class="show-for-large">{{ (nicetime .Bookmark.TimestampCreated).HumanDuration }} ago</td>
<td class="show-for-large">{{ (nicetime .Bookmark.TimestampLastScraped).HumanDuration }} ago</td>
<td>
<a class="button" hx-swap="outerHTML" hx-post="/scrape/{{ .ID }}">scrape</button>
<a class="button" hx-swap="outerHTML" hx-post="/scrape/{{ .Bookmark.ID }}">scrape</button>
</td>
</tr>
{{ end }}

View File

@ -1,3 +0,0 @@
<th class="{{ .Class }}" hx-post="/manage/results?sort={{ .URLString }}" hx-target="#manage-results">{{ .Name }}&nbsp;{{ .TitleArrow }}
</th>

View File

@ -1,5 +1,8 @@
<ul>
{{ range .results }}
<li><a href="{{ .URL }}">{{ .Info.Title }}</a> - {{ .URL }}</li>
<li>
<a href="{{ .Bookmark.URL }}">{{ .Bookmark.Info.Title }}</a><br>
{{ .Highlight }}
</li>
{{ end }}
</ul>

View File

@ -50,26 +50,9 @@ type Server struct {
}
type ColumnInfo struct {
Name string
Param string
Sorted string
Class string
}
func (c ColumnInfo) URLString() string {
if c.Sorted == "asc" {
return "-" + c.Param
}
return c.Param
}
func (c ColumnInfo) TitleArrow() string {
if c.Sorted == "asc" {
return "↑"
} else if c.Sorted == "desc" {
return "↓"
}
return ""
Name string
Param string
Class string
}
// Create creates a new web server instance and sets up routing.
@ -126,9 +109,8 @@ func Create(bmm *db.BookmarkManager, cmm *db.ConfigManager) *Server {
})
r.GET("/manage", func(c *gin.Context) {
allBookmarks, _ := bmm.ListBookmarks()
meta := gin.H{"page": "manage", "config": config, "bookmarks": allBookmarks}
results, _ := bmm.Search(db.SearchOptions{All: true})
meta := gin.H{"page": "manage", "config": config, "results": results}
c.HTML(http.StatusOK,
"_layout.html", meta,
)
@ -136,37 +118,18 @@ func Create(bmm *db.BookmarkManager, cmm *db.ConfigManager) *Server {
r.POST("/manage/results", func(c *gin.Context) {
query := c.PostForm("query")
sort := c.Query("sort")
bookmarks := []entity.Bookmark{}
results := make([]entity.BookmarkSearchResult, 0)
if query == "" {
bookmarks, _ = bmm.ListBookmarks()
results, _ = bmm.Search(db.SearchOptions{All: true, Results: 100})
} else {
bookmarks, _ = bmm.Search(db.SearchOptions{Query: query, Sort: sort})
results, _ = bmm.Search(db.SearchOptions{Query: query})
}
meta := gin.H{"config": config, "bookmarks": bookmarks}
meta := gin.H{"config": config, "results": results}
colTitle := &ColumnInfo{Name: "Title/URL", Param: "title"}
colCreated := &ColumnInfo{Name: "Created", Param: "created", Class: "show-for-large"}
colScraped := &ColumnInfo{Name: "Scraped", Param: "scraped", Class: "show-for-large"}
if sort == "title" {
colTitle.Sorted = "asc"
}
if sort == "-title" {
colTitle.Sorted = "desc"
}
if sort == "scraped" {
colScraped.Sorted = "asc"
}
if sort == "-scraped" {
colScraped.Sorted = "desc"
}
if sort == "created" {
colCreated.Sorted = "asc"
}
if sort == "-created" {
colCreated.Sorted = "desc"
}
cols := gin.H{
"title": colTitle,
@ -175,9 +138,7 @@ func Create(bmm *db.BookmarkManager, cmm *db.ConfigManager) *Server {
}
meta["column"] = cols
c.HTML(http.StatusOK,
"manage_results.html", meta,
)
c.HTML(http.StatusOK, "manage_results.html", meta)
})
@ -466,10 +427,7 @@ func Create(bmm *db.BookmarkManager, cmm *db.ConfigManager) *Server {
func plotPoints(sortedKeys []time.Time, dbStats entity.DBStats, p *plot.Plot, k string) {
if k == "indexed_words" {
p.Title.Text = "Indexed words over time"
p.Y.Label.Text = "Words indexed"
} else if k == "bookmarks" {
if k == "bookmarks" {
p.Title.Text = "Bookmarks over time"
p.Y.Label.Text = "Bookmarks"
} else {
@ -480,9 +438,7 @@ func plotPoints(sortedKeys []time.Time, dbStats entity.DBStats, p *plot.Plot, k
pts := make(plotter.XYs, len(sortedKeys))
for i := range sortedKeys {
pts[i].X = float64(sortedKeys[i].Unix())
if k == "indexed_words" {
pts[i].Y = float64(dbStats.History[sortedKeys[i]].IndexedWords)
} else if k == "bookmarks" {
if k == "bookmarks" {
pts[i].Y = float64(dbStats.History[sortedKeys[i]].Bookmarks)
} else {
panic("bad key")