mirror of
https://gitgud.io/yats/libkiwi.git
synced 2026-06-22 19:15:30 -04:00
Refactoring n shit
This commit is contained in:
+23
-97
@@ -1,7 +1,6 @@
|
||||
package libkiwi
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -14,6 +13,8 @@ import (
|
||||
gq "github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
const _USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0"
|
||||
|
||||
type KF struct {
|
||||
client http.Client
|
||||
domain *url.URL
|
||||
@@ -38,47 +39,9 @@ func NewKF(hc http.Client, host *url.URL) (*KF, error) {
|
||||
domain: u,
|
||||
}
|
||||
|
||||
// Update host url in case we get redirected across domains.
|
||||
hc.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
||||
reqHost := req.URL.Hostname()
|
||||
if reqHost != u.Hostname() {
|
||||
// Deliberately set to Hostname() and not Host.
|
||||
// This excludes any extra shit like ports.
|
||||
u.Host = reqHost
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return kf, nil
|
||||
}
|
||||
|
||||
func (kf *KF) GetPage(ctx context.Context, u *url.URL) (*http.Response, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0")
|
||||
|
||||
resp, err := kf.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// KiwiFlare redirect is signaled by 203 status.
|
||||
if resp.StatusCode == 203 {
|
||||
err = kf.solveKiwiFlare(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Try fetching the page again now that we're authed.
|
||||
return kf.GetPage(ctx, u)
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
type User struct {
|
||||
ID uint32
|
||||
Name string
|
||||
@@ -87,94 +50,57 @@ type User struct {
|
||||
}
|
||||
|
||||
func parsePostAuthor(article *gq.Selection) (User, error) {
|
||||
user := User{}
|
||||
|
||||
userBlock := article.Find("section.message-user")
|
||||
|
||||
idStr, ok := userBlock.Attr("data-user-id")
|
||||
if !ok {
|
||||
// TODO: Proper error types.
|
||||
return User{}, errors.New("Failed to parse post author attr.")
|
||||
return user, errors.New("Failed to parse post author attr.")
|
||||
}
|
||||
|
||||
id, err := strconv.Atoi(idStr)
|
||||
if err != nil {
|
||||
return User{}, err
|
||||
return user, err
|
||||
}
|
||||
user.ID = uint32(id)
|
||||
|
||||
name, ok := article.Attr("data-author")
|
||||
if !ok {
|
||||
return User{}, errors.New("Failed to parse post author attr.")
|
||||
return user, errors.New("Failed to parse post author attr.")
|
||||
}
|
||||
user.Name = name
|
||||
|
||||
title := userBlock.Find(".message-userTitle").Text()
|
||||
user.Title = userBlock.Find(".message-userTitle").Text()
|
||||
|
||||
urlStr, ok := userBlock.Attr("itemid")
|
||||
if !ok {
|
||||
return User{}, errors.New("Failed to parse post author attr.")
|
||||
return user, errors.New("Failed to parse post author attr.")
|
||||
}
|
||||
|
||||
u, err := url.Parse(urlStr)
|
||||
if err != nil {
|
||||
return User{}, err
|
||||
}
|
||||
|
||||
user := User{
|
||||
ID: uint32(id),
|
||||
Name: name,
|
||||
Title: title,
|
||||
URL: u,
|
||||
return user, err
|
||||
}
|
||||
user.URL = u
|
||||
|
||||
return user, nil
|
||||
}
|
||||
|
||||
type Post struct {
|
||||
Author User
|
||||
Body io.Reader
|
||||
Text []byte
|
||||
|
||||
HTML []byte
|
||||
|
||||
article *gq.Selection
|
||||
body *gq.Selection
|
||||
}
|
||||
|
||||
func (kf *KF) GetPost(ctx context.Context, postID uint32) (Post, error) {
|
||||
// Example post goto link: https://kiwifarms.st/goto/post?id=22058462
|
||||
gtl := fmt.Sprintf("%s/goto/post?id=%d", kf.domain.String(), postID)
|
||||
u, err := url.Parse(gtl)
|
||||
func (post *Post) TextContent() (io.Reader, error) {
|
||||
postHTML, err := post.body.Html()
|
||||
if err != nil {
|
||||
return Post{}, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp, err := kf.GetPage(ctx, u)
|
||||
if err != nil {
|
||||
return Post{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
doc, err := gq.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return Post{}, err
|
||||
}
|
||||
|
||||
// Selector: #js-post-22058462
|
||||
article := doc.Find(fmt.Sprintf("article#js-post-%d", postID))
|
||||
|
||||
body := article.Find("div.message-content article.message-body")
|
||||
if body.Length() == 0 {
|
||||
return Post{}, errors.New("Failed to parse post message body.")
|
||||
}
|
||||
|
||||
bh, err := body.Html()
|
||||
if err != nil {
|
||||
return Post{}, err
|
||||
}
|
||||
r := strings.NewReader(bh)
|
||||
|
||||
author, err := parsePostAuthor(article)
|
||||
if err != nil {
|
||||
return Post{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
post := Post{
|
||||
Author: author,
|
||||
Body: r,
|
||||
}
|
||||
|
||||
return post, nil
|
||||
return strings.NewReader(postHTML), nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user