127 lines
3.0 KiB
Go
127 lines
3.0 KiB
Go
|
// Package haiku finds haiku within English sentences.
|
||
|
package haiku
|
||
|
|
||
|
import (
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"regexp"
|
||
|
"strings"
|
||
|
"unicode"
|
||
|
|
||
|
"github.com/mtso/syllables"
|
||
|
)
|
||
|
|
||
|
type Haiku struct {
|
||
|
lines []string
|
||
|
}
|
||
|
|
||
|
// Lines returns the Haiku as multiple lines.
|
||
|
func (h Haiku) Lines() []string {
|
||
|
stripped := []string{}
|
||
|
for _, l := range h.lines {
|
||
|
stripped = append(stripped, strings.TrimSpace(l))
|
||
|
}
|
||
|
return stripped
|
||
|
}
|
||
|
|
||
|
// String returns the Haiku as a single string, with newlines between
|
||
|
// each line.
|
||
|
func (h Haiku) String() string {
|
||
|
stripped := []string{}
|
||
|
for _, l := range h.lines {
|
||
|
stripped = append(stripped, strings.TrimSpace(l))
|
||
|
}
|
||
|
return strings.Join(stripped, "\n")
|
||
|
}
|
||
|
|
||
|
// Sentences
|
||
|
func sentencesFromText(text string) []string {
|
||
|
// split into sentences
|
||
|
re := regexp.MustCompile(`(\w[.!\?+])\s+`)
|
||
|
|
||
|
// Split the text using the regex
|
||
|
matches := re.FindAllStringIndex(text, -1)
|
||
|
var sentences []string
|
||
|
lastIndex := 0
|
||
|
|
||
|
for _, match := range matches {
|
||
|
end := match[1] // Include punctuation
|
||
|
sentences = append(sentences, strings.TrimSpace(text[lastIndex:end]))
|
||
|
lastIndex = end
|
||
|
}
|
||
|
|
||
|
if lastIndex < len(text) {
|
||
|
sentences = append(sentences, strings.TrimSpace(text[lastIndex:]))
|
||
|
}
|
||
|
|
||
|
return sentences
|
||
|
}
|
||
|
|
||
|
func wordsInSentence(s string) []string {
|
||
|
s = strings.ToLower(s) // aesthetic :-)
|
||
|
s = strings.TrimFunc(s, func(r rune) bool {
|
||
|
return !unicode.IsLetter(r)
|
||
|
})
|
||
|
if s == "" {
|
||
|
return nil
|
||
|
}
|
||
|
return strings.Split(s, " ")
|
||
|
}
|
||
|
|
||
|
func haikuFromSentence(s string) (Haiku, error) {
|
||
|
words := wordsInSentence(s)
|
||
|
if len(words) == 0 {
|
||
|
return Haiku{}, errors.New("sentence has 0 words")
|
||
|
}
|
||
|
line := 0
|
||
|
counts := []int{5, 7, 5}
|
||
|
wordIdx := 0
|
||
|
// lines := make([]string, len(counts))
|
||
|
haiku := Haiku{
|
||
|
lines: make([]string, len(counts)),
|
||
|
}
|
||
|
for {
|
||
|
if line == len(counts) && wordIdx == len(words) {
|
||
|
// we finished the haiku, at the same time as we ran out of words!
|
||
|
return haiku, nil
|
||
|
} else if wordIdx == len(words) {
|
||
|
// we ran out of words before we filled in the haiku
|
||
|
return Haiku{}, fmt.Errorf("not a haiku - ran out of words at line: %d, counts: %#v, lines: %#v", line, counts, haiku.lines)
|
||
|
} else if line == len(counts) {
|
||
|
return Haiku{}, fmt.Errorf("not a haiku - too many words: %d, counts: %#v, lines: %#v", line, counts, haiku.lines)
|
||
|
|
||
|
}
|
||
|
|
||
|
thisWord := words[wordIdx]
|
||
|
counts[line] -= syllables.In(thisWord)
|
||
|
haiku.lines[line] += thisWord + " "
|
||
|
if counts[line] == 0 {
|
||
|
// we finished a line with the right number of syllables, move to next line
|
||
|
line++
|
||
|
wordIdx++
|
||
|
continue
|
||
|
} else if counts[line] < 0 {
|
||
|
// blew past the syllable count
|
||
|
break
|
||
|
}
|
||
|
wordIdx++
|
||
|
}
|
||
|
return Haiku{}, errors.New("not a haiku")
|
||
|
}
|
||
|
|
||
|
// Find finds 0 or more haiku in an arbitrary string. The string may contain
|
||
|
// one or more sentences, delimited by normal English punctuation. A haiku
|
||
|
// will only be matched against a complete sentence.
|
||
|
func Find(s string) []Haiku {
|
||
|
h := []Haiku{}
|
||
|
sentences := sentencesFromText(s)
|
||
|
for _, sentence := range sentences {
|
||
|
println(sentence)
|
||
|
aHaiku, err := haikuFromSentence(sentence)
|
||
|
if err == nil {
|
||
|
h = append(h, aHaiku)
|
||
|
}
|
||
|
}
|
||
|
return h
|
||
|
}
|