Merge pull request #809 from BSG-Walter/main

fix: resolve DuckDuckGo redirect URLs in HTML fallback search
This commit is contained in:
PewDiePie
2026-06-02 09:41:34 +09:00
committed by GitHub
2 changed files with 44 additions and 2 deletions
+23 -1
View File
@@ -4,6 +4,7 @@ import json
import logging
import os
from typing import List, Optional
from urllib.parse import urljoin, urlparse, parse_qs
import httpx
from bs4 import BeautifulSoup
@@ -300,6 +301,27 @@ def _brave_search_impl(query: str, count: int, time_filter: Optional[str] = None
def duckduckgo_search(query: str, count: int = 10, time_filter: Optional[str] = None) -> List[dict]:
"""Search using DuckDuckGo via the duckduckgo-search library. No API key needed."""
def _resolve_url(raw: str) -> str:
"""Resolve DuckDuckGo redirect URL to the actual destination URL."""
if not raw:
return raw
# Handle protocol-relative URLs
resolved = raw
if resolved.startswith("//"):
resolved = "https:" + resolved
elif resolved.startswith("/"):
resolved = urljoin("https://html.duckduckgo.com", resolved)
# Extract the actual URL from DuckDuckGo's /l/?uddg= redirect
try:
parsed = urlparse(resolved)
if "duckduckgo.com" in (parsed.hostname or "") and parsed.path.rstrip("/") == "/l":
qs = parse_qs(parsed.query)
if "uddg" in qs:
return qs["uddg"][0]
except Exception:
pass
return resolved
def _html_fallback() -> List[dict]:
try:
response = httpx.get(
@@ -315,7 +337,7 @@ def duckduckgo_search(query: str, count: int = 10, time_filter: Optional[str] =
link = result.select_one(".result__a")
if not link:
continue
url = link.get("href", "")
url = _resolve_url(link.get("href", ""))
if not url:
continue
snippet_el = result.select_one(".result__snippet")