mirror of
https://gitgud.io/yats/libkiwi.git
synced 2026-06-20 01:55:23 -04:00
181 lines
3.4 KiB
Go
181 lines
3.4 KiB
Go
package libkiwi
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/http/cookiejar"
|
|
"net/url"
|
|
"strconv"
|
|
"strings"
|
|
|
|
gq "github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
type KF struct {
|
|
client http.Client
|
|
domain *url.URL
|
|
}
|
|
|
|
// Supply your own http.Client to route through any proxies.
|
|
func NewKF(hc http.Client, host *url.URL) (*KF, error) {
|
|
u, err := url.Parse(fmt.Sprintf("https://%s", host.Hostname()))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
jar, err := cookiejar.New(nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
hc.Jar = jar
|
|
|
|
kf := &KF{
|
|
client: hc,
|
|
domain: u,
|
|
}
|
|
|
|
// Update host url in case we get redirected across domains.
|
|
hc.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
|
reqHost := req.URL.Hostname()
|
|
if reqHost != u.Hostname() {
|
|
// Deliberately set to Hostname() and not Host.
|
|
// This excludes any extra shit like ports.
|
|
u.Host = reqHost
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
return kf, nil
|
|
}
|
|
|
|
func (kf *KF) GetPage(ctx context.Context, u *url.URL) (*http.Response, error) {
|
|
req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0")
|
|
|
|
resp, err := kf.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// KiwiFlare redirect is signaled by 203 status.
|
|
if resp.StatusCode == 203 {
|
|
err = kf.solveKiwiFlare(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Try fetching the page again now that we're authed.
|
|
return kf.GetPage(ctx, u)
|
|
}
|
|
|
|
return resp, nil
|
|
}
|
|
|
|
type User struct {
|
|
ID uint32
|
|
Name string
|
|
Title string
|
|
URL *url.URL
|
|
}
|
|
|
|
func parsePostAuthor(article *gq.Selection) (User, error) {
|
|
userBlock := article.Find("section.message-user")
|
|
|
|
idStr, ok := userBlock.Attr("data-user-id")
|
|
if !ok {
|
|
// TODO: Proper error types.
|
|
return User{}, errors.New("Failed to parse post author attr.")
|
|
}
|
|
|
|
id, err := strconv.Atoi(idStr)
|
|
if err != nil {
|
|
return User{}, err
|
|
}
|
|
|
|
name, ok := article.Attr("data-author")
|
|
if !ok {
|
|
return User{}, errors.New("Failed to parse post author attr.")
|
|
}
|
|
|
|
title := userBlock.Find(".message-userTitle").Text()
|
|
|
|
urlStr, ok := userBlock.Attr("itemid")
|
|
if !ok {
|
|
return User{}, errors.New("Failed to parse post author attr.")
|
|
}
|
|
|
|
u, err := url.Parse(urlStr)
|
|
if err != nil {
|
|
return User{}, err
|
|
}
|
|
|
|
user := User{
|
|
ID: uint32(id),
|
|
Name: name,
|
|
Title: title,
|
|
URL: u,
|
|
}
|
|
|
|
return user, nil
|
|
}
|
|
|
|
type Post struct {
|
|
Author User
|
|
Body io.Reader
|
|
}
|
|
|
|
func (kf *KF) GetPost(ctx context.Context, postID uint32) (Post, error) {
|
|
// Example post goto link: https://kiwifarms.st/goto/post?id=22058462
|
|
gtl := fmt.Sprintf("%s/goto/post?id=%d", kf.domain.String(), postID)
|
|
u, err := url.Parse(gtl)
|
|
if err != nil {
|
|
return Post{}, err
|
|
}
|
|
|
|
resp, err := kf.GetPage(ctx, u)
|
|
if err != nil {
|
|
return Post{}, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
doc, err := gq.NewDocumentFromReader(resp.Body)
|
|
if err != nil {
|
|
return Post{}, err
|
|
}
|
|
|
|
// Selector: #js-post-22058462
|
|
article := doc.Find(fmt.Sprintf("article#js-post-%d", postID))
|
|
|
|
body := article.Find("div.message-content article.message-body")
|
|
if body.Length() == 0 {
|
|
return Post{}, errors.New("Failed to parse post message body.")
|
|
}
|
|
|
|
bh, err := body.Html()
|
|
if err != nil {
|
|
return Post{}, err
|
|
}
|
|
r := strings.NewReader(bh)
|
|
|
|
author, err := parsePostAuthor(article)
|
|
if err != nil {
|
|
return Post{}, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
post := Post{
|
|
Author: author,
|
|
Body: r,
|
|
}
|
|
|
|
return post, nil
|
|
}
|