linkwallet/content/content.go

46 lines
838 B
Go
Raw Normal View History

2022-05-24 18:03:31 +09:30
package content
import (
"log"
"time"
"github.com/tardisx/linkwallet/entity"
"github.com/gocolly/colly"
)
func FetchPageInfo(bm entity.Bookmark) entity.PageInfo {
info := entity.PageInfo{
Fetched: time.Now(),
}
url := bm.URL
c := colly.NewCollector()
c.SetRequestTimeout(5 * time.Second)
c.OnHTML("p,h1,h2,h3,h4,h5,h6,li", func(e *colly.HTMLElement) {
info.RawText = info.RawText + e.Text + "\n"
})
c.OnHTML("head>title", func(h *colly.HTMLElement) {
info.Title = h.Text
})
c.OnResponse(func(r *colly.Response) {
2022-06-01 15:13:51 +09:30
info.StatusCode = r.StatusCode
2022-05-24 18:03:31 +09:30
info.Size = len(r.Body)
})
c.OnRequest(func(r *colly.Request) {
// log.Println("Visiting", r.URL.String())
2022-05-24 18:03:31 +09:30
})
c.OnError(func(r *colly.Response, err error) {
log.Printf("error for %s: %s", r.Request.URL.String(), err)
})
c.Visit(url)
return info
}