2022-05-24 18:03:31 +09:30
|
|
|
package content
|
|
|
|
|
|
|
|
import (
|
|
|
|
"log"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/tardisx/linkwallet/entity"
|
|
|
|
|
|
|
|
"github.com/gocolly/colly"
|
|
|
|
)
|
|
|
|
|
|
|
|
func FetchPageInfo(bm entity.Bookmark) entity.PageInfo {
|
|
|
|
info := entity.PageInfo{
|
|
|
|
Fetched: time.Now(),
|
|
|
|
}
|
|
|
|
|
|
|
|
url := bm.URL
|
|
|
|
|
|
|
|
c := colly.NewCollector()
|
|
|
|
c.SetRequestTimeout(5 * time.Second)
|
|
|
|
|
|
|
|
c.OnHTML("p,h1,h2,h3,h4,h5,h6,li", func(e *colly.HTMLElement) {
|
|
|
|
info.RawText = info.RawText + e.Text + "\n"
|
|
|
|
})
|
|
|
|
|
|
|
|
c.OnHTML("head>title", func(h *colly.HTMLElement) {
|
|
|
|
info.Title = h.Text
|
|
|
|
})
|
|
|
|
|
|
|
|
c.OnResponse(func(r *colly.Response) {
|
2022-06-01 15:13:51 +09:30
|
|
|
info.StatusCode = r.StatusCode
|
2022-05-24 18:03:31 +09:30
|
|
|
info.Size = len(r.Body)
|
|
|
|
})
|
|
|
|
|
|
|
|
c.OnRequest(func(r *colly.Request) {
|
2022-06-01 21:51:57 +09:30
|
|
|
// log.Println("Visiting", r.URL.String())
|
2022-05-24 18:03:31 +09:30
|
|
|
})
|
|
|
|
|
|
|
|
c.OnError(func(r *colly.Response, err error) {
|
|
|
|
log.Printf("error for %s: %s", r.Request.URL.String(), err)
|
|
|
|
})
|
|
|
|
|
|
|
|
c.Visit(url)
|
|
|
|
return info
|
|
|
|
}
|