diff --git a/content/content.go b/content/content.go index e92a12a..2c07b96 100644 --- a/content/content.go +++ b/content/content.go @@ -53,13 +53,15 @@ func FetchPageInfo(bm entity.Bookmark) entity.PageInfo { func Words(bm *entity.Bookmark) []string { words := []string{} - words = append(words, StringToSearchWords(bm.Info.RawText)...) - words = append(words, StringToSearchWords(bm.Info.Title)...) - words = append(words, StringToSearchWords(bm.URL)...) + words = append(words, StringToStemmedSearchWords(bm.Info.RawText)...) + words = append(words, StringToStemmedSearchWords(bm.Info.Title)...) + words = append(words, StringToStemmedSearchWords(bm.URL)...) return words } -func StringToSearchWords(s string) []string { +// StringToStemmedSearchWords returns a list of stemmed words with stop words +// removed. +func StringToStemmedSearchWords(s string) []string { words := []string{} words = append(words, stemmerFilter(stopwordFilter(tokenize(s)))...) diff --git a/content/content_test.go b/content/content_test.go index 6502080..4ed4c13 100644 --- a/content/content_test.go +++ b/content/content_test.go @@ -68,7 +68,24 @@ func TestWords(t *testing.T) { words[6] != "dog" { t.Error("incorrect words returned") } + } +} +func TestStemmer(t *testing.T) { + s := `quick quick fox 😂 smile http://google.com` + words1 := StringToStemmedSearchWords(s) + t.Log(words1) + if len(words1) != 7 { + t.Error("wrong number of words") + } + if words1[0] != "quick" || + words1[1] != "quick" || + words1[2] != "fox" || + words1[3] != "smile" || + words1[4] != "http" || + words1[5] != "googl" || + words1[6] != "com" { + t.Error("bad words") } } diff --git a/db/bookmarks.go b/db/bookmarks.go index c1910a8..b3b648e 100644 --- a/db/bookmarks.go +++ b/db/bookmarks.go @@ -113,7 +113,7 @@ func (m *BookmarkManager) Search(opts SearchOptions) ([]entity.Bookmark, error) // first get a list of all the ids that match our query idsMatchingQuery := make([]uint64, 0, 0) counts := make(map[uint64]uint8) - words := content.StringToSearchWords(opts.Query) + words := content.StringToStemmedSearchWords(opts.Query) for _, word := range words { var wi *entity.WordIndex diff --git a/db/index.go b/db/index.go index 540d605..7b85270 100644 --- a/db/index.go +++ b/db/index.go @@ -69,7 +69,6 @@ func (db *DB) UpdateIndexForWordsByID(words []string, id uint64) { func (db *DB) DumpIndex() { - // delete this id from all indices err := db.store.ForEach(&bolthold.Query{}, func(wi *entity.WordIndex) error { log.Printf("%10s: %v", wi.Word, wi.Bitmap) return nil