mirror of
https://gitgud.io/yats/libkiwi.git
synced 2026-06-15 15:55:28 -04:00
108 lines
1.9 KiB
Go
108 lines
1.9 KiB
Go
package libkiwi
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/http/cookiejar"
|
|
"net/url"
|
|
"strconv"
|
|
"strings"
|
|
|
|
gq "github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
const _USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0"
|
|
|
|
type KF struct {
|
|
client http.Client
|
|
domain *url.URL
|
|
}
|
|
|
|
// Supply your own http.Client to route through any proxies.
|
|
func NewKF(hc http.Client, host *url.URL) (*KF, error) {
|
|
u, err := url.Parse(fmt.Sprintf("https://%s", host.Hostname()))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
jar, err := cookiejar.New(nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
hc.Jar = jar
|
|
|
|
kf := &KF{
|
|
client: hc,
|
|
domain: u,
|
|
}
|
|
|
|
return kf, nil
|
|
}
|
|
|
|
type User struct {
|
|
ID uint32
|
|
Name string
|
|
Title string
|
|
URL *url.URL
|
|
}
|
|
|
|
func parsePostAuthor(article *gq.Selection) (User, error) {
|
|
user := User{}
|
|
|
|
userBlock := article.Find("section.message-user")
|
|
|
|
idStr, ok := userBlock.Attr("data-user-id")
|
|
if !ok {
|
|
// TODO: Proper error types.
|
|
return user, errors.New("Failed to parse post author attr.")
|
|
}
|
|
id, err := strconv.Atoi(idStr)
|
|
if err != nil {
|
|
return user, err
|
|
}
|
|
user.ID = uint32(id)
|
|
|
|
name, ok := article.Attr("data-author")
|
|
if !ok {
|
|
return user, errors.New("Failed to parse post author attr.")
|
|
}
|
|
user.Name = name
|
|
|
|
user.Title = userBlock.Find(".message-userTitle").Text()
|
|
|
|
urlStr, ok := userBlock.Attr("itemid")
|
|
if !ok {
|
|
return user, errors.New("Failed to parse post author attr.")
|
|
}
|
|
u, err := url.Parse(urlStr)
|
|
if err != nil {
|
|
return user, err
|
|
}
|
|
user.URL = u
|
|
|
|
return user, nil
|
|
}
|
|
|
|
type Post struct {
|
|
Author User
|
|
|
|
article *gq.Selection
|
|
body *gq.Selection
|
|
}
|
|
|
|
func (post *Post) TextContent() io.Reader {
|
|
return strings.NewReader(strings.TrimSpace(post.body.Text()))
|
|
}
|
|
|
|
func (post *Post) HTML() (io.Reader, error) {
|
|
postHTML, err := post.article.Html()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return strings.NewReader(strings.TrimSpace(postHTML)), nil
|
|
}
|