Initial checkin
This commit is contained in:
208
db/bookmarks.go
Normal file
208
db/bookmarks.go
Normal file
@@ -0,0 +1,208 @@
|
||||
package db
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/tardisx/linkwallet/content"
|
||||
"github.com/tardisx/linkwallet/entity"
|
||||
|
||||
"github.com/timshannon/badgerhold/v4"
|
||||
)
|
||||
|
||||
type BookmarkManager struct {
|
||||
db *DB
|
||||
scrapeQueue chan *entity.Bookmark
|
||||
}
|
||||
|
||||
func NewBookmarkManager(db *DB) *BookmarkManager {
|
||||
return &BookmarkManager{db: db, scrapeQueue: make(chan *entity.Bookmark)}
|
||||
}
|
||||
|
||||
// AddBookmark adds a bookmark to the database. It returns an error
|
||||
// if this bookmark already exists (based on URL match).
|
||||
// The entity.Bookmark ID field will be updated.
|
||||
func (m *BookmarkManager) AddBookmark(bm *entity.Bookmark) error {
|
||||
existing := entity.Bookmark{}
|
||||
err := m.db.store.FindOne(&existing, badgerhold.Where("URL").Eq(bm.URL))
|
||||
if err != badgerhold.ErrNotFound {
|
||||
return fmt.Errorf("bookmark already exists")
|
||||
}
|
||||
bm.TimestampCreated = time.Now()
|
||||
err = m.db.store.Insert(badgerhold.NextSequence(), bm)
|
||||
if err != nil {
|
||||
return fmt.Errorf("addBookmark returned: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ListBookmarks returns all bookmarks.
|
||||
func (m *BookmarkManager) ListBookmarks() ([]entity.Bookmark, error) {
|
||||
bookmarks := make([]entity.Bookmark, 0, 0)
|
||||
err := m.db.store.Find(&bookmarks, &badgerhold.Query{})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return bookmarks, nil
|
||||
}
|
||||
|
||||
func (m *BookmarkManager) SaveBookmark(bm *entity.Bookmark) error {
|
||||
err := m.db.store.Update(bm.ID, &bm)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *BookmarkManager) LoadBookmarkByID(id uint64) entity.Bookmark {
|
||||
// log.Printf("loading %v", ids)
|
||||
ret := entity.Bookmark{}
|
||||
log.Printf("loading id %d", id)
|
||||
err := m.db.store.Get(id, &ret)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (m *BookmarkManager) LoadBookmarksByIDs(ids []uint64) []entity.Bookmark {
|
||||
// log.Printf("loading %v", ids)
|
||||
ret := make([]entity.Bookmark, 0, 0)
|
||||
|
||||
s := make([]interface{}, len(ids))
|
||||
for i, v := range ids {
|
||||
s[i] = v
|
||||
}
|
||||
|
||||
err := m.db.store.Find(&ret, badgerhold.Where("ID").In(s...))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (m *BookmarkManager) Search(query string) ([]entity.Bookmark, error) {
|
||||
rets := make([]uint64, 0, 0)
|
||||
|
||||
counts := make(map[uint64]uint8)
|
||||
|
||||
words := content.StringToSearchWords(query)
|
||||
|
||||
for _, word := range words {
|
||||
var wi *entity.WordIndex
|
||||
err := m.db.store.Get("word_index_"+word, &wi)
|
||||
if err == badgerhold.ErrNotFound {
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error retrieving index: %w", err)
|
||||
}
|
||||
for k := range wi.Bitmap {
|
||||
counts[k]++
|
||||
}
|
||||
}
|
||||
|
||||
// log.Printf("counts: %#v", counts)
|
||||
|
||||
for k, v := range counts {
|
||||
if v == uint8(len(words)) {
|
||||
rets = append(rets, k)
|
||||
if len(rets) > 10 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return m.LoadBookmarksByIDs(rets), nil
|
||||
}
|
||||
|
||||
func (m *BookmarkManager) ScrapeAndIndex(bm *entity.Bookmark) error {
|
||||
|
||||
log.Printf("Start scrape for %s", bm.URL)
|
||||
info := content.FetchPageInfo(*bm)
|
||||
bm.Info = info
|
||||
bm.TimestampLastScraped = time.Now()
|
||||
err := m.SaveBookmark(bm)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
words := content.Words(bm)
|
||||
log.Printf("index for %d %s (%d words)", bm.ID, bm.URL, len(words))
|
||||
m.db.UpdateIndexForWordsByID(words, bm.ID)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *BookmarkManager) QueueScrape(bm *entity.Bookmark) {
|
||||
m.scrapeQueue <- bm
|
||||
}
|
||||
|
||||
func (m *BookmarkManager) RunQueue() {
|
||||
type localScrapeQueue struct {
|
||||
queue []*entity.Bookmark
|
||||
mutex sync.Mutex
|
||||
}
|
||||
|
||||
localQueue := localScrapeQueue{queue: make([]*entity.Bookmark, 0)}
|
||||
// accept things off the queue immediately
|
||||
go func() {
|
||||
for {
|
||||
newItem := <-m.scrapeQueue
|
||||
|
||||
newItem.TimestampLastScraped = time.Now()
|
||||
err := m.SaveBookmark(newItem)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
localQueue.mutex.Lock()
|
||||
localQueue.queue = append(localQueue.queue, newItem)
|
||||
localQueue.mutex.Unlock()
|
||||
log.Printf("queue now has %d entries", len(localQueue.queue))
|
||||
}
|
||||
}()
|
||||
|
||||
for {
|
||||
localQueue.mutex.Lock()
|
||||
if len(localQueue.queue) > 0 {
|
||||
processBM := localQueue.queue[0]
|
||||
localQueue.queue = localQueue.queue[1:]
|
||||
localQueue.mutex.Unlock()
|
||||
|
||||
m.ScrapeAndIndex(processBM)
|
||||
|
||||
} else {
|
||||
localQueue.mutex.Unlock()
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (m *BookmarkManager) UpdateContent() {
|
||||
ret := make([]entity.Bookmark, 0)
|
||||
for {
|
||||
ret = []entity.Bookmark{}
|
||||
deadline := time.Now().Add(time.Hour * -24 * 7)
|
||||
err := m.db.store.Find(&ret, badgerhold.Where("TimestampLastScraped").Lt(deadline))
|
||||
if err == badgerhold.ErrNotFound {
|
||||
log.Printf("none qualify")
|
||||
time.Sleep(time.Second)
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
for _, bm := range ret {
|
||||
thisBM := bm
|
||||
log.Printf("queueing %d because %s", thisBM.ID, thisBM.TimestampLastScraped)
|
||||
m.QueueScrape(&thisBM)
|
||||
}
|
||||
time.Sleep(time.Second * 5)
|
||||
}
|
||||
}
|
||||
35
db/db.go
Normal file
35
db/db.go
Normal file
@@ -0,0 +1,35 @@
|
||||
package db
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/tardisx/linkwallet/entity"
|
||||
|
||||
badgerhold "github.com/timshannon/badgerhold/v4"
|
||||
)
|
||||
|
||||
type DB struct {
|
||||
store *badgerhold.Store
|
||||
}
|
||||
|
||||
func (db *DB) Open(dir string) {
|
||||
options := badgerhold.DefaultOptions
|
||||
options.Dir = dir
|
||||
options.ValueDir = dir
|
||||
store, err := badgerhold.Open(options)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
|
||||
}
|
||||
db.store = store
|
||||
}
|
||||
|
||||
func (db *DB) Close() {
|
||||
db.store.Close()
|
||||
}
|
||||
|
||||
func (db *DB) Dumpy() {
|
||||
res := make([]entity.Bookmark, 0, 0)
|
||||
db.store.Find(&res, &badgerhold.Query{})
|
||||
log.Printf("%v", res)
|
||||
}
|
||||
75
db/index.go
Normal file
75
db/index.go
Normal file
@@ -0,0 +1,75 @@
|
||||
package db
|
||||
|
||||
import (
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/tardisx/linkwallet/entity"
|
||||
|
||||
badgerhold "github.com/timshannon/badgerhold/v4"
|
||||
)
|
||||
|
||||
func (db *DB) InitIndices() {
|
||||
wi := entity.WordIndex{}
|
||||
db.store.DeleteMatching(wi, &badgerhold.Query{})
|
||||
}
|
||||
|
||||
func (db *DB) UpdateIndexForWordsByID(words []string, id uint64) {
|
||||
// delete this id from all indices
|
||||
txn := db.store.Badger().NewTransaction(true)
|
||||
|
||||
db.store.TxForEach(txn, &badgerhold.Query{}, func(wi *entity.WordIndex) {
|
||||
// log.Printf("considering this one: %s", wi.Word)
|
||||
delete(wi.Bitmap, id)
|
||||
})
|
||||
|
||||
// addiing
|
||||
var find, store time.Duration
|
||||
for i, word := range words {
|
||||
// log.Printf("indexing %s", word)
|
||||
tF := time.Now()
|
||||
thisWI := entity.WordIndex{Word: word}
|
||||
err := db.store.TxGet(txn, "word_index_"+word, &thisWI)
|
||||
// err := db.store.TxFindOne(txn, &thisWI, badgerhold.Where("Word").Eq(word).Index("Word"))
|
||||
if err == badgerhold.ErrNotFound {
|
||||
// create it
|
||||
thisWI.Bitmap = map[uint64]bool{}
|
||||
} else if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
findT := time.Since(tF)
|
||||
|
||||
tS := time.Now()
|
||||
thisWI.Bitmap[id] = true
|
||||
// log.Printf("BM: %v", thisWI.Bitmap)
|
||||
err = db.store.TxUpsert(txn, "word_index_"+word, thisWI)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
findS := time.Since(tS)
|
||||
find += findT
|
||||
store += findS
|
||||
|
||||
if i > 0 && i%100 == 0 {
|
||||
txn.Commit()
|
||||
txn = db.store.Badger().NewTransaction(true)
|
||||
}
|
||||
|
||||
}
|
||||
//log.Printf("find %s store %s", find, store)
|
||||
|
||||
txn.Commit()
|
||||
}
|
||||
|
||||
func (db *DB) DumpIndex() {
|
||||
|
||||
// delete this id from all indices
|
||||
err := db.store.ForEach(&badgerhold.Query{}, func(wi *entity.WordIndex) error {
|
||||
log.Printf("%10s: %v", wi.Word, wi.Bitmap)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user