Files
libkiwi/libkiwi.go
T
2026-06-05 17:36:21 -04:00

181 lines
3.4 KiB
Go

package libkiwi
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"net/http/cookiejar"
"net/url"
"strconv"
"strings"
gq "github.com/PuerkitoBio/goquery"
)
type KF struct {
client http.Client
domain *url.URL
}
// Supply your own http.Client to route through any proxies.
func NewKF(hc http.Client, host *url.URL) (*KF, error) {
u, err := url.Parse(fmt.Sprintf("https://%s", host.Hostname()))
if err != nil {
return nil, err
}
jar, err := cookiejar.New(nil)
if err != nil {
return nil, err
}
hc.Jar = jar
kf := &KF{
client: hc,
domain: u,
}
// Update host url in case we get redirected across domains.
hc.CheckRedirect = func(req *http.Request, via []*http.Request) error {
reqHost := req.URL.Hostname()
if reqHost != u.Hostname() {
// Deliberately set to Hostname() and not Host.
// This excludes any extra shit like ports.
u.Host = reqHost
}
return nil
}
return kf, nil
}
func (kf *KF) GetPage(ctx context.Context, u *url.URL) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0")
resp, err := kf.client.Do(req)
if err != nil {
return nil, err
}
// KiwiFlare redirect is signaled by 203 status.
if resp.StatusCode == 203 {
err = kf.solveKiwiFlare(ctx)
if err != nil {
return nil, err
}
// Try fetching the page again now that we're authed.
return kf.GetPage(ctx, u)
}
return resp, nil
}
type User struct {
ID uint32
Name string
Title string
URL *url.URL
}
func parsePostAuthor(article *gq.Selection) (User, error) {
userBlock := article.Find("section.message-user")
idStr, ok := userBlock.Attr("data-user-id")
if !ok {
// TODO: Proper error types.
return User{}, errors.New("Failed to parse post author attr.")
}
id, err := strconv.Atoi(idStr)
if err != nil {
return User{}, err
}
name, ok := article.Attr("data-author")
if !ok {
return User{}, errors.New("Failed to parse post author attr.")
}
title := userBlock.Find(".message-userTitle").Text()
urlStr, ok := userBlock.Attr("itemid")
if !ok {
return User{}, errors.New("Failed to parse post author attr.")
}
u, err := url.Parse(urlStr)
if err != nil {
return User{}, err
}
user := User{
ID: uint32(id),
Name: name,
Title: title,
URL: u,
}
return user, nil
}
type Post struct {
Author User
Body io.Reader
}
func (kf *KF) GetPost(ctx context.Context, postID uint32) (Post, error) {
// Example post goto link: https://kiwifarms.st/goto/post?id=22058462
gtl := fmt.Sprintf("%s/goto/post?id=%d", kf.domain.String(), postID)
u, err := url.Parse(gtl)
if err != nil {
return Post{}, err
}
resp, err := kf.GetPage(ctx, u)
if err != nil {
return Post{}, err
}
defer resp.Body.Close()
doc, err := gq.NewDocumentFromReader(resp.Body)
if err != nil {
return Post{}, err
}
// Selector: #js-post-22058462
article := doc.Find(fmt.Sprintf("article#js-post-%d", postID))
body := article.Find("div.message-content article.message-body")
if body.Length() == 0 {
return Post{}, errors.New("Failed to parse post message body.")
}
bh, err := body.Html()
if err != nil {
return Post{}, err
}
r := strings.NewReader(bh)
author, err := parsePostAuthor(article)
if err != nil {
return Post{}, err
}
defer resp.Body.Close()
post := Post{
Author: author,
Body: r,
}
return post, nil
}