mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
4c41834dc7
Make src.youtube_handler a compatibility wrapper around services.youtube.youtube_handler so transcript state, URL parsing, and timeout behavior no longer diverge.
105 lines
4.5 KiB
Python
105 lines
4.5 KiB
Python
"""Regression: the YouTube handler must live in a single module.
|
|
|
|
Odysseus carried two independent copies of the handler — ``src.youtube_handler``
|
|
and ``services.youtube.youtube_handler`` — that silently drifted:
|
|
|
|
* ``app.py`` calls ``services.youtube.init_youtube()`` at startup, but the chat
|
|
flow imported ``extract_transcript_async`` from ``src.youtube_handler``. Those
|
|
were different module objects, so the ``YOUTUBE_AVAILABLE`` /
|
|
``YouTubeTranscriptApi`` globals set by ``init_youtube`` never reached the chat
|
|
path and transcript extraction always reported "not available".
|
|
* The comment-fetch timeout fix (PR #1002) landed only in the ``src`` copy.
|
|
|
|
These tests pin the two import paths to one module object and verify the shared
|
|
state and the broadened URL parsing.
|
|
"""
|
|
import sys
|
|
import types
|
|
|
|
import pytest
|
|
|
|
|
|
def test_src_and_service_youtube_are_same_module():
|
|
"""Both historical import paths must resolve to one module object so
|
|
behavior and module-level state cannot diverge again."""
|
|
import src.youtube_handler as src_yt
|
|
import services.youtube.youtube_handler as svc_yt
|
|
|
|
assert src_yt is svc_yt
|
|
|
|
|
|
def test_init_youtube_visible_through_chat_import_path(monkeypatch):
|
|
"""init_youtube() is invoked via services.youtube (as app.py does), but the
|
|
chat flow reads the API globals through src.youtube_handler. After
|
|
consolidation the globals set by init must be visible on both paths."""
|
|
import src.youtube_handler as src_yt
|
|
from services.youtube import init_youtube
|
|
|
|
# Pin the globals so monkeypatch restores them after the test, regardless
|
|
# of whether youtube_transcript_api is actually installed in this env.
|
|
monkeypatch.setattr(src_yt, "YOUTUBE_AVAILABLE", False, raising=False)
|
|
monkeypatch.setattr(src_yt, "YouTubeTranscriptApi", None, raising=False)
|
|
|
|
# Stand in for the real transcript package so init_youtube() succeeds
|
|
# without a network/library dependency.
|
|
stub = types.ModuleType("youtube_transcript_api")
|
|
|
|
class _StubApi:
|
|
pass
|
|
|
|
stub.YouTubeTranscriptApi = _StubApi
|
|
monkeypatch.setitem(sys.modules, "youtube_transcript_api", stub)
|
|
|
|
init_youtube() # called exactly the way app.py calls it
|
|
|
|
assert src_yt.YOUTUBE_AVAILABLE is True
|
|
assert src_yt.YouTubeTranscriptApi is _StubApi
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"url,expected",
|
|
[
|
|
# Classic watch URLs across the youtube.com hosts.
|
|
("https://www.youtube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
("https://youtube.com/watch?v=dQw4w9WgXcQ&t=42s", "dQw4w9WgXcQ"),
|
|
("https://m.youtube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
# YouTube Music shares the same paths and must resolve.
|
|
("https://music.youtube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
# Short links.
|
|
("https://youtu.be/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
("https://youtu.be/dQw4w9WgXcQ?si=ab_cd", "dQw4w9WgXcQ"),
|
|
# Player/embed and the legacy /v/ embed.
|
|
("https://www.youtube.com/embed/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
("https://www.youtube.com/embed/dQw4w9WgXcQ/", "dQw4w9WgXcQ"),
|
|
("https://www.youtube.com/v/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
# Shorts and live — previously unrecognized, so the chat pipeline
|
|
# dropped them entirely (excluded from web-fetch as a YouTube URL, but
|
|
# no id meant no transcript fetch either).
|
|
("https://www.youtube.com/shorts/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
("https://www.youtube.com/shorts/dQw4w9WgXcQ?feature=share", "dQw4w9WgXcQ"),
|
|
("https://www.youtube.com/live/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
# Host matching is case-insensitive.
|
|
("https://WWW.YouTube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
# Non-video paths and non-YouTube hosts yield no id.
|
|
("https://www.youtube.com/", None),
|
|
("https://www.youtube.com/feed/subscriptions", None),
|
|
("https://example.com/watch?v=dQw4w9WgXcQ", None),
|
|
("https://vimeo.com/76979871", None),
|
|
],
|
|
)
|
|
def test_extract_youtube_id(url, expected):
|
|
from src.youtube_handler import extract_youtube_id
|
|
|
|
assert extract_youtube_id(url) == expected
|
|
|
|
|
|
def test_shorts_url_is_recognized_and_extractable():
|
|
"""A Shorts URL is treated as a YouTube link (so the chat pipeline excludes
|
|
it from generic web-fetch). It must therefore yield an id, or the video is
|
|
silently dropped — fetched by neither path."""
|
|
from src.youtube_handler import is_youtube_url, extract_youtube_id
|
|
|
|
url = "https://www.youtube.com/shorts/dQw4w9WgXcQ"
|
|
assert is_youtube_url(url)
|
|
assert extract_youtube_id(url) == "dQw4w9WgXcQ"
|