Add some tests in preparation for new ways of indexing words

This commit is contained in:
2023-05-17 20:09:28 +09:30
parent 1ca348ab45
commit 76c5282695
4 changed files with 24 additions and 6 deletions

View File

@@ -53,13 +53,15 @@ func FetchPageInfo(bm entity.Bookmark) entity.PageInfo {
func Words(bm *entity.Bookmark) []string {
words := []string{}
words = append(words, StringToSearchWords(bm.Info.RawText)...)
words = append(words, StringToSearchWords(bm.Info.Title)...)
words = append(words, StringToSearchWords(bm.URL)...)
words = append(words, StringToStemmedSearchWords(bm.Info.RawText)...)
words = append(words, StringToStemmedSearchWords(bm.Info.Title)...)
words = append(words, StringToStemmedSearchWords(bm.URL)...)
return words
}
func StringToSearchWords(s string) []string {
// StringToStemmedSearchWords returns a list of stemmed words with stop words
// removed.
func StringToStemmedSearchWords(s string) []string {
words := []string{}
words = append(words, stemmerFilter(stopwordFilter(tokenize(s)))...)

View File

@@ -68,7 +68,24 @@ func TestWords(t *testing.T) {
words[6] != "dog" {
t.Error("incorrect words returned")
}
}
}
func TestStemmer(t *testing.T) {
s := `quick quick fox 😂 smile http://google.com`
words1 := StringToStemmedSearchWords(s)
t.Log(words1)
if len(words1) != 7 {
t.Error("wrong number of words")
}
if words1[0] != "quick" ||
words1[1] != "quick" ||
words1[2] != "fox" ||
words1[3] != "smile" ||
words1[4] != "http" ||
words1[5] != "googl" ||
words1[6] != "com" {
t.Error("bad words")
}
}