From 46634a9ea1d90800147c9ff475612914d03d8e50 Mon Sep 17 00:00:00 2001 From: y a t s <140337963+y-a-t-s@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:29:12 -0500 Subject: [PATCH] Fix domain redirects Migrate to standalone kiwijar lib --- go.mod | 7 ++- go.sum | 2 + jar.go | 148 ------------------------------------------------ libkiwi.go | 35 +++++++----- libkiwi_test.go | 16 +++--- utils.go | 44 -------------- 6 files changed, 35 insertions(+), 217 deletions(-) delete mode 100644 jar.go delete mode 100644 utils.go diff --git a/go.mod b/go.mod index de5d566..226d781 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,11 @@ module github.com/y-a-t-s/libkiwi -go 1.23.0 +go 1.23.3 -require github.com/y-a-t-s/firebird v0.0.0-20240927151147-c1c3219d176b +require ( + github.com/y-a-t-s/firebird v0.0.0-20240927151147-c1c3219d176b + github.com/y-a-t-s/kiwijar v0.0.0-20241202190418-813d7ca625d8 +) require ( github.com/klauspost/cpuid/v2 v2.2.3 // indirect diff --git a/go.sum b/go.sum index 4240de2..25eb5ae 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,8 @@ github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dz github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8= github.com/y-a-t-s/firebird v0.0.0-20240927151147-c1c3219d176b h1:zX9Hj9mK7cSExqPUxdO2LS0OsEpflRqGu94yC1BuDfU= github.com/y-a-t-s/firebird v0.0.0-20240927151147-c1c3219d176b/go.mod h1:aq9EHq1B6MDC0RdIRbcFHI5SsGaeztoMjL7tRdgJCAQ= +github.com/y-a-t-s/kiwijar v0.0.0-20241202190418-813d7ca625d8 h1:0PX2ayEHWoVubMT+IKdupf29q538szmBmdzCn30fJuA= +github.com/y-a-t-s/kiwijar v0.0.0-20241202190418-813d7ca625d8/go.mod h1:3FWsCf08sDaZGUA0yXpwWSvIfYUFvuOL7Q1fftseDjc= golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/jar.go b/jar.go deleted file mode 100644 index fdb2de5..0000000 --- a/jar.go +++ /dev/null @@ -1,148 +0,0 @@ -package libkiwi - -import ( - "fmt" - "net/http" - "net/url" - "sync" -) - -type cookieMap map[string]map[string]*http.Cookie - -// An http cookiejar implementation that doesn't suck ass. -type KiwiJar struct { - cookieMap - mutex sync.Mutex - - init func() -} - -func NewKiwiJar() *KiwiJar { - kj := new(KiwiJar) - kj.init = sync.OnceFunc(func() { - kj.cookieMap = make(cookieMap, 2) - }) - - return kj -} - -func (kj *KiwiJar) Cookies(u *url.URL) []*http.Cookie { - kj.newDomain(u) - - hn := u.Hostname() - res := make(chan []*http.Cookie, 1) - - go func() { - kj.mutex.Lock() - defer kj.mutex.Unlock() - - cs := make([]*http.Cookie, 0, len(kj.cookieMap[hn])) - for _, c := range kj.cookieMap[hn] { - cs = append(cs, c) - } - - res <- cs - }() - - return <-res -} - -func (kj *KiwiJar) ParseString(u *url.URL, cookies string) error { - if cookies == "" { - return nil - } - - cs, err := parseCookieString(cookies) - if err != nil { - return err - } - - kj.init() - kj.SetCookies(u, cs) - - return nil -} - -func (kj *KiwiJar) CookieString(u *url.URL) (cookies string) { - cs := kj.Cookies(u) - for _, c := range cs { - cookies += fmt.Sprintf("; %s=%s", c.Name, c.Value) - } - if len(cookies) > 2 { - // Remove leading semicolon+space. - cookies = cookies[2:] - } - - return -} - -func (kj *KiwiJar) GetCookie(u *url.URL, name string) *http.Cookie { - kj.newDomain(u) - - res := make(chan *http.Cookie, 1) - - go func() { - kj.mutex.Lock() - defer kj.mutex.Unlock() - - res <- kj.cookieMap[u.Hostname()][name] - }() - - return <-res -} - -func (kj *KiwiJar) set(u *url.URL, cookie *http.Cookie) { - kj.mutex.Lock() - defer kj.mutex.Unlock() - - kj.cookieMap[u.Hostname()][cookie.Name] = cookie -} - -func (kj *KiwiJar) SetCookie(u *url.URL, cookie *http.Cookie) { - kj.newDomain(u) - - done := make(chan bool, 1) - - go func() { - defer close(done) - kj.set(u, cookie) - }() - - <-done -} - -func (kj *KiwiJar) SetCookies(u *url.URL, cookies []*http.Cookie) { - kj.newDomain(u) - - var wg sync.WaitGroup - - for _, c := range cookies { - wg.Add(1) - go func() { - defer wg.Done() - kj.set(u, c) - }() - } - - wg.Wait() -} - -func (kj *KiwiJar) newDomain(u *url.URL) { - kj.init() - if kj.cookieMap[u.Hostname()] != nil { - return - } - - done := make(chan bool, 1) - - go func() { - defer close(done) - - kj.mutex.Lock() - defer kj.mutex.Unlock() - - kj.cookieMap[u.Hostname()] = make(map[string]*http.Cookie, 16) - }() - - <-done -} diff --git a/libkiwi.go b/libkiwi.go index 4cdf9cc..91bd63c 100644 --- a/libkiwi.go +++ b/libkiwi.go @@ -2,12 +2,13 @@ package libkiwi import ( "context" - "errors" "net/http" "net/url" "regexp" + "strings" "github.com/y-a-t-s/firebird" + "github.com/y-a-t-s/kiwijar" ) type KF struct { @@ -17,18 +18,14 @@ type KF struct { // Supply your own http.Client to route through any proxies. func NewKF(hc http.Client, host string, cookies string) (kf *KF, err error) { - _, host, err = splitProtocol(host) - if err != nil { - return - } - u, err := url.Parse("https://" + host) + u, err := parseHost(host) if err != nil { return } - jar := NewKiwiJar() + jar := kiwijar.KiwiJar{} jar.ParseString(u, cookies) - hc.Jar = jar + hc.Jar = &jar kf = &KF{ Client: hc, @@ -39,11 +36,6 @@ func NewKF(hc http.Client, host string, cookies string) (kf *KF, err error) { } func (kf *KF) GetPage(ctx context.Context, u *url.URL) (resp *http.Response, err error) { - if u == nil { - err = errors.New("Received nil URL.") - return - } - req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil) if err != nil { return @@ -53,6 +45,12 @@ func (kf *KF) GetPage(ctx context.Context, u *url.URL) (resp *http.Response, err if err != nil { return } + hn := resp.Request.URL.Hostname() + if hn != kf.domain.Hostname() { + jar := kf.Client.Jar.(*kiwijar.KiwiJar) + jar.SetCookies(resp.Request.URL, jar.Cookies(kf.domain)) + kf.domain.Host = hn + } // KiwiFlare redirect is signaled by 203 status. if resp.StatusCode == 203 { @@ -69,7 +67,7 @@ func (kf *KF) GetPage(ctx context.Context, u *url.URL) (resp *http.Response, err func (kf *KF) RefreshSession(ctx context.Context) (tk string, err error) { // Clear any existing session token to request a new one. - kf.Client.Jar.(*KiwiJar).SetCookie(kf.domain, &http.Cookie{ + kf.Client.Jar.(*kiwijar.KiwiJar).SetCookie(kf.domain, &http.Cookie{ Name: "xf_session", Value: "", }) @@ -100,3 +98,12 @@ func (kf *KF) solveKiwiFlare(ctx context.Context) error { return nil } + +func parseHost(host string) (*url.URL, error) { + // Try prepending protocol if it seems to be missing. + if !strings.Contains(strings.Split(host, "/")[0], "://") { + host = "https://" + host + } + + return url.Parse(host) +} diff --git a/libkiwi_test.go b/libkiwi_test.go index 7ed1d7f..4ec01e9 100644 --- a/libkiwi_test.go +++ b/libkiwi_test.go @@ -4,12 +4,13 @@ import ( "context" "log" "net/http" - "net/url" "os" "testing" + + "github.com/y-a-t-s/kiwijar" ) -const TEST_HOST = "kiwifarms.st" +const TEST_HOST = "kiwifarms.net" func TestGetPage(t *testing.T) { cookies := os.Getenv("TEST_COOKIES") @@ -28,12 +29,14 @@ func TestGetPage(t *testing.T) { } defer resp.Body.Close() - log.Printf("Response status code: %d\n", resp.StatusCode) + log.Printf("Response status code: %d\n\n", resp.StatusCode) for k, v := range resp.Header { if len(v) > 0 { log.Printf("%s: %s\n", k, v[0]) } } + log.Printf("Response host: %s\n\n", kf.domain) + log.Printf("Cookies: %s\n", kf.Client.Jar.(*kiwijar.KiwiJar).CookieString(kf.domain)) } func TestRefreshSession(t *testing.T) { @@ -62,10 +65,5 @@ func TestCookieString(t *testing.T) { t.Error(err) } - u, err := url.Parse("https://" + TEST_HOST) - if err != nil { - t.Error(err) - } - - log.Println("Cookies from jar: " + kf.Client.Jar.(*KiwiJar).CookieString(u)) + log.Println("Cookies from jar: " + kf.Client.Jar.(*kiwijar.KiwiJar).CookieString(kf.domain)) } diff --git a/utils.go b/utils.go deleted file mode 100644 index 0f13375..0000000 --- a/utils.go +++ /dev/null @@ -1,44 +0,0 @@ -package libkiwi - -import ( - "errors" - "net/http" - "regexp" - "strings" -) - -func parseCookieString(cookies string) ([]*http.Cookie, error) { - sp := strings.Split(cookies, "; ") - cs := make([]*http.Cookie, len(sp)) - - for i, c := range sp { - kv := strings.Split(c, "=") - if len(kv) != 2 { - return nil, errors.New("Invalid cookie string: " + cookies) - } - cs[i] = &http.Cookie{ - Name: kv[0], - Value: kv[1], - } - } - - return cs, nil -} - -func splitProtocol(addr string) (proto string, host string, err error) { - // FindStringSubmatch is used to capture the groups. - // Index 0 is the full matching string with all groups. - // The rest are numbered by the order of the opening parens. - // Here, we want the last 2 groups (indexes 1 and 2, requiring length 3). - tmp := regexp.MustCompile(`^([\w-]+://)?([^/]+)`).FindStringSubmatch(addr) - // At the very least, we need the hostname part (index 2). - if len(tmp) < 3 || tmp[2] == "" { - err = errors.New("Failed to parse address: " + addr) - return - } - - proto = tmp[1] - host = tmp[2] - - return -}