Initial checkin
This commit is contained in:
parent
7f7ba7d29e
commit
e1f630a80d
14
go.mod
Normal file
14
go.mod
Normal file
@ -0,0 +1,14 @@
|
||||
module github.com/tardisx/haiku-detector
|
||||
|
||||
go 1.24.1
|
||||
|
||||
require (
|
||||
github.com/mtso/syllables v0.1.0
|
||||
github.com/stretchr/testify v1.10.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
12
go.sum
Normal file
12
go.sum
Normal file
@ -0,0 +1,12 @@
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/mtso/syllables v0.1.0 h1:tY0Kef9J0XY6EyN7ee9fCIwXXqw3kZPBEWqTplYFutA=
|
||||
github.com/mtso/syllables v0.1.0/go.mod h1:b69tp5uXvnTheTZrfZUzLzn9uQMeUPXZO6b4zOvMC0M=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
126
haiku.go
Normal file
126
haiku.go
Normal file
@ -0,0 +1,126 @@
|
||||
// Package haiku finds haiku within English sentences.
|
||||
package haiku
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/mtso/syllables"
|
||||
)
|
||||
|
||||
type Haiku struct {
|
||||
lines []string
|
||||
}
|
||||
|
||||
// Lines returns the Haiku as multiple lines.
|
||||
func (h Haiku) Lines() []string {
|
||||
stripped := []string{}
|
||||
for _, l := range h.lines {
|
||||
stripped = append(stripped, strings.TrimSpace(l))
|
||||
}
|
||||
return stripped
|
||||
}
|
||||
|
||||
// String returns the Haiku as a single string, with newlines between
|
||||
// each line.
|
||||
func (h Haiku) String() string {
|
||||
stripped := []string{}
|
||||
for _, l := range h.lines {
|
||||
stripped = append(stripped, strings.TrimSpace(l))
|
||||
}
|
||||
return strings.Join(stripped, "\n")
|
||||
}
|
||||
|
||||
// Sentences
|
||||
func sentencesFromText(text string) []string {
|
||||
// split into sentences
|
||||
re := regexp.MustCompile(`(\w[.!\?+])\s+`)
|
||||
|
||||
// Split the text using the regex
|
||||
matches := re.FindAllStringIndex(text, -1)
|
||||
var sentences []string
|
||||
lastIndex := 0
|
||||
|
||||
for _, match := range matches {
|
||||
end := match[1] // Include punctuation
|
||||
sentences = append(sentences, strings.TrimSpace(text[lastIndex:end]))
|
||||
lastIndex = end
|
||||
}
|
||||
|
||||
if lastIndex < len(text) {
|
||||
sentences = append(sentences, strings.TrimSpace(text[lastIndex:]))
|
||||
}
|
||||
|
||||
return sentences
|
||||
}
|
||||
|
||||
func wordsInSentence(s string) []string {
|
||||
s = strings.ToLower(s) // aesthetic :-)
|
||||
s = strings.TrimFunc(s, func(r rune) bool {
|
||||
return !unicode.IsLetter(r)
|
||||
})
|
||||
if s == "" {
|
||||
return nil
|
||||
}
|
||||
return strings.Split(s, " ")
|
||||
}
|
||||
|
||||
func haikuFromSentence(s string) (Haiku, error) {
|
||||
words := wordsInSentence(s)
|
||||
if len(words) == 0 {
|
||||
return Haiku{}, errors.New("sentence has 0 words")
|
||||
}
|
||||
line := 0
|
||||
counts := []int{5, 7, 5}
|
||||
wordIdx := 0
|
||||
// lines := make([]string, len(counts))
|
||||
haiku := Haiku{
|
||||
lines: make([]string, len(counts)),
|
||||
}
|
||||
for {
|
||||
if line == len(counts) && wordIdx == len(words) {
|
||||
// we finished the haiku, at the same time as we ran out of words!
|
||||
return haiku, nil
|
||||
} else if wordIdx == len(words) {
|
||||
// we ran out of words before we filled in the haiku
|
||||
return Haiku{}, fmt.Errorf("not a haiku - ran out of words at line: %d, counts: %#v, lines: %#v", line, counts, haiku.lines)
|
||||
} else if line == len(counts) {
|
||||
return Haiku{}, fmt.Errorf("not a haiku - too many words: %d, counts: %#v, lines: %#v", line, counts, haiku.lines)
|
||||
|
||||
}
|
||||
|
||||
thisWord := words[wordIdx]
|
||||
counts[line] -= syllables.In(thisWord)
|
||||
haiku.lines[line] += thisWord + " "
|
||||
if counts[line] == 0 {
|
||||
// we finished a line with the right number of syllables, move to next line
|
||||
line++
|
||||
wordIdx++
|
||||
continue
|
||||
} else if counts[line] < 0 {
|
||||
// blew past the syllable count
|
||||
break
|
||||
}
|
||||
wordIdx++
|
||||
}
|
||||
return Haiku{}, errors.New("not a haiku")
|
||||
}
|
||||
|
||||
// Find finds 0 or more haiku in an arbitrary string. The string may contain
|
||||
// one or more sentences, delimited by normal English punctuation. A haiku
|
||||
// will only be matched against a complete sentence.
|
||||
func Find(s string) []Haiku {
|
||||
h := []Haiku{}
|
||||
sentences := sentencesFromText(s)
|
||||
for _, sentence := range sentences {
|
||||
println(sentence)
|
||||
aHaiku, err := haikuFromSentence(sentence)
|
||||
if err == nil {
|
||||
h = append(h, aHaiku)
|
||||
}
|
||||
}
|
||||
return h
|
||||
}
|
134
haiku_test.go
Normal file
134
haiku_test.go
Normal file
@ -0,0 +1,134 @@
|
||||
package haiku
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestSentenceExtraction(t *testing.T) {
|
||||
text := `The giant brown dog! He jumped over the candlestick into a popsicle! What a disaster! But maybe a good disaster? He spent $1.50 on it. All the while he was chanting "U.S.A." like a madman.`
|
||||
s := sentencesFromText(text)
|
||||
if assert.Len(t, s, 6) {
|
||||
assert.Equal(t, "What a disaster!", s[2])
|
||||
assert.Equal(t, `All the while he was chanting "U.S.A." like a madman.`, s[5])
|
||||
}
|
||||
}
|
||||
|
||||
func TestSentenceExtractionMultiline(t *testing.T) {
|
||||
text := `Sentence one.
|
||||
Sentence two, on a new line.
|
||||
|
||||
Sentence three, after a blank line!`
|
||||
s := sentencesFromText(text)
|
||||
assert.Len(t, s, 3)
|
||||
assert.Equal(t, "Sentence two, on a new line.", s[1])
|
||||
assert.Equal(t, `Sentence three, after a blank line!`, s[2])
|
||||
}
|
||||
|
||||
func TestSentenceNoPunctuation(t *testing.T) {
|
||||
text := `this is just some words`
|
||||
s := sentencesFromText(text)
|
||||
assert.Len(t, s, 1)
|
||||
assert.Equal(t, "this is just some words", s[0])
|
||||
}
|
||||
|
||||
func TestSentenceRepeatedPunctuation(t *testing.T) {
|
||||
text := `this is just some words!! With a lot of emphasis!`
|
||||
s := sentencesFromText(text)
|
||||
t.Skip("this case does not yet work?")
|
||||
if assert.Len(t, s, 2) {
|
||||
assert.Equal(t, "this is just some words!!", s[0])
|
||||
assert.Equal(t, "With a lot of emphasis!", s[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestWordsInSentence(t *testing.T) {
|
||||
text := `the quick brown dog`
|
||||
words := wordsInSentence(text)
|
||||
assert.Len(t, words, 4)
|
||||
|
||||
text = `the quick brown dog.`
|
||||
words = wordsInSentence(text)
|
||||
assert.Len(t, words, 4)
|
||||
|
||||
text = `it cost $4.50, or more`
|
||||
words = wordsInSentence(text)
|
||||
assert.Len(t, words, 5)
|
||||
|
||||
text = "the quick gay dude ran to the new pink car in the state of mind to win"
|
||||
words = wordsInSentence(text)
|
||||
assert.Len(t, words, 17)
|
||||
assert.Equal(t, "the", words[0])
|
||||
assert.Equal(t, "quick", words[1])
|
||||
assert.Equal(t, "to", words[15])
|
||||
assert.Equal(t, "win", words[16])
|
||||
|
||||
text = "the quick gay dude ran to the new pink car in the state of mind to win!"
|
||||
words = wordsInSentence(text)
|
||||
assert.Len(t, words, 17)
|
||||
assert.Equal(t, "the", words[0])
|
||||
assert.Equal(t, "quick", words[1])
|
||||
assert.Equal(t, "to", words[15])
|
||||
assert.Equal(t, "win", words[16])
|
||||
|
||||
text = ""
|
||||
words = wordsInSentence(text)
|
||||
assert.Len(t, words, 0)
|
||||
}
|
||||
|
||||
func TestHaikuFromSentence(t *testing.T) {
|
||||
// 1 2 3 4 5| 6 7 8 9 10 11 12| 13 14 15 16 17
|
||||
h, err := haikuFromSentence("the quick fast dude ran to the new red car in the state of mind to win")
|
||||
if assert.NoError(t, err) {
|
||||
assert.Equal(t, `the quick fast dude ran
|
||||
to the new red car in the
|
||||
state of mind to win`,
|
||||
h.String())
|
||||
assert.Equal(t, "to the new red car in the", h.Lines()[1])
|
||||
}
|
||||
|
||||
_, err = haikuFromSentence("did u talk also about the breakup part? like why it happened and if that reason is still a thing?")
|
||||
assert.ErrorContains(t, err, "not a haiku - too many words")
|
||||
|
||||
h, err = haikuFromSentence("by grabthar's hammer, what a savings you can make - almost criminal")
|
||||
if assert.NoError(t, err) {
|
||||
assert.Equal(t, `by grabthar's hammer,
|
||||
what a savings you can make
|
||||
- almost criminal`,
|
||||
h.String())
|
||||
}
|
||||
|
||||
h, err = haikuFromSentence("")
|
||||
assert.ErrorContains(t, err, "sentence has 0 words")
|
||||
|
||||
}
|
||||
|
||||
func TestParagraphsToHaikus(t *testing.T) {
|
||||
text := `Threads are heavyweights, goroutines are light as air, concurrency wins.
|
||||
|
||||
Strict and silent judge, catches errors in my code, no nil slips away. One thread sends a word, another waits in silence, they meet, work is done. Shape of thought defined, no need for inheritance, duck typing prevails.
|
||||
|
||||
Memory held tight, swept away in quiet dusk, Go frees what I leave.`
|
||||
out := Find(text)
|
||||
|
||||
if assert.Len(t, out, 5) {
|
||||
assert.Equal(t, `one thread sends a word,
|
||||
another waits in silence,
|
||||
they meet, work is done`, out[2].String())
|
||||
}
|
||||
}
|
||||
|
||||
func FuzzHaikuFromSentence(f *testing.F) {
|
||||
f.Add("")
|
||||
f.Add("this is some boring text")
|
||||
f.Add(" this is some boring text ")
|
||||
f.Add("this is some boring text (@()*@(@)* ijOF(@@( lots of ))punctuation")
|
||||
f.Add(" ")
|
||||
f.Add("12092897234987")
|
||||
f.Add("\U000c9288\U000fa405䠁\U000dbcd3\U001095be\U0003456e\U0007fe36\U0008d5d3𧝳\U0006a3e0䠹𗙧\U0003ec48\U00078be3\U0007dd24\U000b4b50뗏\U0005a099\U0003eb1e\U000ffef6\U000c61c0𠬢\U000d02a3\U000a41dd\U000c62ba\U000890c0\U000e534e\U000d8155\U00072e3a\U0010c563\U000a7730\U00048c0a\U000a2698\U000dd595\U000d0768𝁔\U00038a30컩\U00032563𠷰🭣\U000564a5𝒈\U0004104c\U0006bdc8\U000d72d3\U000ce78b\U00065795\U00102f31\U000723eb\U000dc97b\U0006a4b7亮\U0010c8f7\U000e8090\U0008abf9𘬒푆\U00012716𭝱\U00016244\U0006012e\U000689d3\U0009da4d𭼴\U0009a067\U000aede5\U000b9d6b\U0004b136\U000975c4㷭\U0003f873\U00081080\U00047c35\U000b8ee5\U000d4493\U000dccb4\U000313f2\U00084081\U00084dc6\U000e8fc8\U000a362b\U00101023\U000855d6\U000e4318\U000cebf6\U00051bc5\U000d9bff\U00092349\U000ae3da\U000a116f\U0003f89e\U000d5c54\U000b34ae\U0006d4a0\U0003cd52\U00083f6c\U0003a7fc\U0005083b\U00086439")
|
||||
f.Add("춈洞䠁킓㑮翦贓𧝳檠䠹𗙧㻈磣絤骐뗏娙㺞챀𠬢킣ꐝ챺茀킕瀺ꜰ䠊ꉘ킕큨𝁔㢰컩㉣𠷰🭣啥𝒈䄌殈틓챋敕𐋱爫킻橷亮谹𘬒푆ሖ𭝱㡄愮桓鶍𭼴騧껥맥䬶䬆靄㷭㾳耀䰵뷥킓챴㈲蓁虆ꍫ𐄣蓖챶傅ힿ鈉긚ꄯ㾞픔ꍮ洠㳒葛㩼偏")
|
||||
f.Fuzz(func(t *testing.T, in string) {
|
||||
haikuFromSentence(in)
|
||||
})
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user