haiku-detector/haiku.go

127 lines
3.0 KiB
Go
Raw Permalink Normal View History

2025-03-27 18:58:03 +10:30
// Package haiku finds haiku within English sentences.
package haiku
import (
"errors"
"fmt"
"regexp"
"strings"
"unicode"
"github.com/mtso/syllables"
)
type Haiku struct {
lines []string
}
// Lines returns the Haiku as multiple lines.
func (h Haiku) Lines() []string {
stripped := []string{}
for _, l := range h.lines {
stripped = append(stripped, strings.TrimSpace(l))
}
return stripped
}
// String returns the Haiku as a single string, with newlines between
// each line.
func (h Haiku) String() string {
stripped := []string{}
for _, l := range h.lines {
stripped = append(stripped, strings.TrimSpace(l))
}
return strings.Join(stripped, "\n")
}
// Sentences
func sentencesFromText(text string) []string {
// split into sentences
re := regexp.MustCompile(`(\w[.!\?+])\s+`)
// Split the text using the regex
matches := re.FindAllStringIndex(text, -1)
var sentences []string
lastIndex := 0
for _, match := range matches {
end := match[1] // Include punctuation
sentences = append(sentences, strings.TrimSpace(text[lastIndex:end]))
lastIndex = end
}
if lastIndex < len(text) {
sentences = append(sentences, strings.TrimSpace(text[lastIndex:]))
}
return sentences
}
func wordsInSentence(s string) []string {
s = strings.ToLower(s) // aesthetic :-)
s = strings.TrimFunc(s, func(r rune) bool {
return !unicode.IsLetter(r)
})
if s == "" {
return nil
}
return strings.Split(s, " ")
}
func haikuFromSentence(s string) (Haiku, error) {
words := wordsInSentence(s)
if len(words) == 0 {
return Haiku{}, errors.New("sentence has 0 words")
}
line := 0
counts := []int{5, 7, 5}
wordIdx := 0
// lines := make([]string, len(counts))
haiku := Haiku{
lines: make([]string, len(counts)),
}
for {
if line == len(counts) && wordIdx == len(words) {
// we finished the haiku, at the same time as we ran out of words!
return haiku, nil
} else if wordIdx == len(words) {
// we ran out of words before we filled in the haiku
return Haiku{}, fmt.Errorf("not a haiku - ran out of words at line: %d, counts: %#v, lines: %#v", line, counts, haiku.lines)
} else if line == len(counts) {
return Haiku{}, fmt.Errorf("not a haiku - too many words: %d, counts: %#v, lines: %#v", line, counts, haiku.lines)
}
thisWord := words[wordIdx]
counts[line] -= syllables.In(thisWord)
haiku.lines[line] += thisWord + " "
if counts[line] == 0 {
// we finished a line with the right number of syllables, move to next line
line++
wordIdx++
continue
} else if counts[line] < 0 {
// blew past the syllable count
break
}
wordIdx++
}
return Haiku{}, errors.New("not a haiku")
}
// Find finds 0 or more haiku in an arbitrary string. The string may contain
// one or more sentences, delimited by normal English punctuation. A haiku
// will only be matched against a complete sentence.
func Find(s string) []Haiku {
h := []Haiku{}
sentences := sentencesFromText(s)
for _, sentence := range sentences {
println(sentence)
aHaiku, err := haikuFromSentence(sentence)
if err == nil {
h = append(h, aHaiku)
}
}
return h
}