Add some tests in preparation for new ways of indexing words
This commit is contained in:
parent
1ca348ab45
commit
76c5282695
@ -53,13 +53,15 @@ func FetchPageInfo(bm entity.Bookmark) entity.PageInfo {
|
|||||||
func Words(bm *entity.Bookmark) []string {
|
func Words(bm *entity.Bookmark) []string {
|
||||||
words := []string{}
|
words := []string{}
|
||||||
|
|
||||||
words = append(words, StringToSearchWords(bm.Info.RawText)...)
|
words = append(words, StringToStemmedSearchWords(bm.Info.RawText)...)
|
||||||
words = append(words, StringToSearchWords(bm.Info.Title)...)
|
words = append(words, StringToStemmedSearchWords(bm.Info.Title)...)
|
||||||
words = append(words, StringToSearchWords(bm.URL)...)
|
words = append(words, StringToStemmedSearchWords(bm.URL)...)
|
||||||
return words
|
return words
|
||||||
}
|
}
|
||||||
|
|
||||||
func StringToSearchWords(s string) []string {
|
// StringToStemmedSearchWords returns a list of stemmed words with stop words
|
||||||
|
// removed.
|
||||||
|
func StringToStemmedSearchWords(s string) []string {
|
||||||
words := []string{}
|
words := []string{}
|
||||||
|
|
||||||
words = append(words, stemmerFilter(stopwordFilter(tokenize(s)))...)
|
words = append(words, stemmerFilter(stopwordFilter(tokenize(s)))...)
|
||||||
|
@ -68,7 +68,24 @@ func TestWords(t *testing.T) {
|
|||||||
words[6] != "dog" {
|
words[6] != "dog" {
|
||||||
t.Error("incorrect words returned")
|
t.Error("incorrect words returned")
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStemmer(t *testing.T) {
|
||||||
|
s := `quick quick fox 😂 smile http://google.com`
|
||||||
|
words1 := StringToStemmedSearchWords(s)
|
||||||
|
t.Log(words1)
|
||||||
|
if len(words1) != 7 {
|
||||||
|
t.Error("wrong number of words")
|
||||||
|
}
|
||||||
|
if words1[0] != "quick" ||
|
||||||
|
words1[1] != "quick" ||
|
||||||
|
words1[2] != "fox" ||
|
||||||
|
words1[3] != "smile" ||
|
||||||
|
words1[4] != "http" ||
|
||||||
|
words1[5] != "googl" ||
|
||||||
|
words1[6] != "com" {
|
||||||
|
t.Error("bad words")
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -113,7 +113,7 @@ func (m *BookmarkManager) Search(opts SearchOptions) ([]entity.Bookmark, error)
|
|||||||
// first get a list of all the ids that match our query
|
// first get a list of all the ids that match our query
|
||||||
idsMatchingQuery := make([]uint64, 0, 0)
|
idsMatchingQuery := make([]uint64, 0, 0)
|
||||||
counts := make(map[uint64]uint8)
|
counts := make(map[uint64]uint8)
|
||||||
words := content.StringToSearchWords(opts.Query)
|
words := content.StringToStemmedSearchWords(opts.Query)
|
||||||
|
|
||||||
for _, word := range words {
|
for _, word := range words {
|
||||||
var wi *entity.WordIndex
|
var wi *entity.WordIndex
|
||||||
|
@ -69,7 +69,6 @@ func (db *DB) UpdateIndexForWordsByID(words []string, id uint64) {
|
|||||||
|
|
||||||
func (db *DB) DumpIndex() {
|
func (db *DB) DumpIndex() {
|
||||||
|
|
||||||
// delete this id from all indices
|
|
||||||
err := db.store.ForEach(&bolthold.Query{}, func(wi *entity.WordIndex) error {
|
err := db.store.ForEach(&bolthold.Query{}, func(wi *entity.WordIndex) error {
|
||||||
log.Printf("%10s: %v", wi.Word, wi.Bitmap)
|
log.Printf("%10s: %v", wi.Word, wi.Bitmap)
|
||||||
return nil
|
return nil
|
||||||
|
Loading…
x
Reference in New Issue
Block a user