package libkiwi import ( "context" "errors" "fmt" "io" "net/http" "net/http/cookiejar" "net/url" "strconv" "strings" gq "github.com/PuerkitoBio/goquery" ) type KF struct { client http.Client domain *url.URL } // Supply your own http.Client to route through any proxies. func NewKF(hc http.Client, host *url.URL) (*KF, error) { u, err := url.Parse(fmt.Sprintf("https://%s", host.Hostname())) if err != nil { return nil, err } jar, err := cookiejar.New(nil) if err != nil { return nil, err } hc.Jar = jar kf := &KF{ client: hc, domain: u, } // Update host url in case we get redirected across domains. hc.CheckRedirect = func(req *http.Request, via []*http.Request) error { reqHost := req.URL.Hostname() if reqHost != u.Hostname() { // Deliberately set to Hostname() and not Host. // This excludes any extra shit like ports. u.Host = reqHost } return nil } return kf, nil } func (kf *KF) GetPage(ctx context.Context, u *url.URL) (*http.Response, error) { req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil) if err != nil { return nil, err } req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0") resp, err := kf.client.Do(req) if err != nil { return nil, err } // KiwiFlare redirect is signaled by 203 status. if resp.StatusCode == 203 { err = kf.solveKiwiFlare(ctx) if err != nil { return nil, err } // Try fetching the page again now that we're authed. return kf.GetPage(ctx, u) } return resp, nil } type User struct { ID uint32 Name string Title string URL *url.URL } func parsePostAuthor(article *gq.Selection) (User, error) { userBlock := article.Find("section.message-user") idStr, ok := userBlock.Attr("data-user-id") if !ok { // TODO: Proper error types. return User{}, errors.New("Failed to parse post author attr.") } id, err := strconv.Atoi(idStr) if err != nil { return User{}, err } name, ok := article.Attr("data-author") if !ok { return User{}, errors.New("Failed to parse post author attr.") } title := userBlock.Find(".message-userTitle").Text() urlStr, ok := userBlock.Attr("itemid") if !ok { return User{}, errors.New("Failed to parse post author attr.") } u, err := url.Parse(urlStr) if err != nil { return User{}, err } user := User{ ID: uint32(id), Name: name, Title: title, URL: u, } return user, nil } type Post struct { Author User Body io.Reader } func (kf *KF) GetPost(ctx context.Context, postID uint32) (Post, error) { // Example post goto link: https://kiwifarms.st/goto/post?id=22058462 gtl := fmt.Sprintf("%s/goto/post?id=%d", kf.domain.String(), postID) u, err := url.Parse(gtl) if err != nil { return Post{}, err } resp, err := kf.GetPage(ctx, u) if err != nil { return Post{}, err } defer resp.Body.Close() doc, err := gq.NewDocumentFromReader(resp.Body) if err != nil { return Post{}, err } // Selector: #js-post-22058462 article := doc.Find(fmt.Sprintf("article#js-post-%d", postID)) body := article.Find("div.message-content article.message-body") if body.Length() == 0 { return Post{}, errors.New("Failed to parse post message body.") } bh, err := body.Html() if err != nil { return Post{}, err } r := strings.NewReader(bh) author, err := parsePostAuthor(article) if err != nil { return Post{}, err } defer resp.Body.Close() post := Post{ Author: author, Body: r, } return post, nil }