1
0
mirror of https://github.com/zedeus/nitter.git synced 2025-12-06 03:55:36 -05:00

3 Commits

Author SHA1 Message Date
Zed
3f3196d103 Fix broken UserMedia photo rail parsing
Fixes #1307
2025-11-17 00:14:52 +01:00
Zed
68fc7b71c8 Fix media support for cookie sessions
Fixes #1304
2025-11-16 23:22:21 +01:00
0xCathiefish
55d4469401 fix: correct argument parsing for --append flag in get_web_session.py (#1305)
* fix: correct argument parsing for --append flag

* fix: strip quotes from extracted user_id in twid cookie
2025-11-16 21:18:38 +01:00
7 changed files with 216 additions and 86 deletions

View File

@@ -4,6 +4,15 @@ import packedjson
import types, query, formatters, consts, apiutils, parser
import experimental/parser as newParser
proc mediaUrl(id: string; cursor: string): SessionAwareUrl =
let
cookieVariables = userMediaVariables % [id, cursor]
oauthVariables = userTweetsVariables % [id, cursor]
result = SessionAwareUrl(
cookieUrl: graphUserMedia ? {"variables": cookieVariables, "features": gqlFeatures},
oauthUrl: graphUserMediaV2 ? {"variables": oauthVariables, "features": gqlFeatures}
)
proc getGraphUser*(username: string): Future[User] {.async.} =
if username.len == 0: return
let
@@ -26,12 +35,14 @@ proc getGraphUserTweets*(id: string; kind: TimelineKind; after=""): Future[Profi
cursor = if after.len > 0: "\"cursor\":\"$1\"," % after else: ""
variables = userTweetsVariables % [id, cursor]
params = {"variables": variables, "features": gqlFeatures}
(url, apiId) = case kind
of TimelineKind.tweets: (graphUserTweets, Api.userTweets)
of TimelineKind.replies: (graphUserTweetsAndReplies, Api.userTweetsAndReplies)
of TimelineKind.media: (graphUserMedia, Api.userMedia)
js = await fetch(url ? params, apiId)
result = parseGraphTimeline(js, "user", after)
js = case kind
of TimelineKind.tweets:
await fetch(graphUserTweets ? params, Api.userTweets)
of TimelineKind.replies:
await fetch(graphUserTweetsAndReplies ? params, Api.userTweetsAndReplies)
of TimelineKind.media:
await fetch(mediaUrl(id, cursor), Api.userMedia)
result = parseGraphTimeline(js, after)
proc getGraphListTweets*(id: string; after=""): Future[Timeline] {.async.} =
if id.len == 0: return
@@ -40,19 +51,21 @@ proc getGraphListTweets*(id: string; after=""): Future[Timeline] {.async.} =
variables = listTweetsVariables % [id, cursor]
params = {"variables": variables, "features": gqlFeatures}
js = await fetch(graphListTweets ? params, Api.listTweets)
result = parseGraphTimeline(js, "list", after).tweets
result = parseGraphTimeline(js, after).tweets
proc getGraphListBySlug*(name, list: string): Future[List] {.async.} =
let
variables = %*{"screenName": name, "listSlug": list}
params = {"variables": $variables, "features": gqlFeatures}
result = parseGraphList(await fetch(graphListBySlug ? params, Api.listBySlug))
url = graphListBySlug ? params
result = parseGraphList(await fetch(url, Api.listBySlug))
proc getGraphList*(id: string): Future[List] {.async.} =
let
variables = """{"listId": "$1"}""" % id
params = {"variables": variables, "features": gqlFeatures}
result = parseGraphList(await fetch(graphListById ? params, Api.list))
url = graphListById ? params
result = parseGraphList(await fetch(url, Api.list))
proc getGraphListMembers*(list: List; after=""): Future[Result[User]] {.async.} =
if list.id.len == 0: return
@@ -138,11 +151,8 @@ proc getGraphUserSearch*(query: Query; after=""): Future[Result[User]] {.async.}
proc getPhotoRail*(id: string): Future[PhotoRail] {.async.} =
if id.len == 0: return
let
variables = userTweetsVariables % [id, ""]
params = {"variables": variables, "features": gqlFeatures}
url = graphUserMedia ? params
result = parseGraphPhotoRail(await fetch(url, Api.userMedia))
let js = await fetch(mediaUrl(id, ""), Api.userMedia)
result = parseGraphPhotoRail(js)
proc resolve*(url: string; prefs: Prefs): Future[string] {.async.} =
let client = newAsyncHttpClient(maxRedirects=0)

View File

@@ -55,21 +55,22 @@ proc genHeaders*(session: Session, url: string): HttpHeaders =
result["x-csrf-token"] = session.ct0
result["cookie"] = getCookieHeader(session.authToken, session.ct0)
proc getAndValidateSession*(api: Api): Future[Session] {.async.} =
result = await getSession(api)
case result.kind
of SessionKind.oauth:
if result.oauthToken.len == 0:
echo "[sessions] Empty oauth token, session: ", result.id
raise rateLimitError()
of SessionKind.cookie:
if result.authToken.len == 0 or result.ct0.len == 0:
echo "[sessions] Empty cookie credentials, session: ", result.id
raise rateLimitError()
template fetchImpl(result, fetchBody) {.dirty.} =
once:
pool = HttpPool()
var session = await getSession(api)
case session.kind
of SessionKind.oauth:
if session.oauthToken.len == 0:
echo "[sessions] Empty oauth token, session: ", session.id
raise rateLimitError()
of SessionKind.cookie:
if session.authToken.len == 0 or session.ct0.len == 0:
echo "[sessions] Empty cookie credentials, session: ", session.id
raise rateLimitError()
try:
var resp: AsyncResponse
pool.use(genHeaders(session, $url)):
@@ -136,9 +137,17 @@ template retry(bod) =
echo "[sessions] Rate limited, retrying ", api, " request..."
bod
proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} =
proc fetch*(url: Uri | SessionAwareUrl; api: Api): Future[JsonNode] {.async.} =
retry:
var body: string
var
body: string
session = await getAndValidateSession(api)
when url is SessionAwareUrl:
let url = case session.kind
of SessionKind.oauth: url.oauthUrl
of SessionKind.cookie: url.cookieUrl
fetchImpl body:
if body.startsWith('{') or body.startsWith('['):
result = parseJson(body)
@@ -153,8 +162,15 @@ proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} =
invalidate(session)
raise rateLimitError()
proc fetchRaw*(url: Uri; api: Api): Future[string] {.async.} =
proc fetchRaw*(url: Uri | SessionAwareUrl; api: Api): Future[string] {.async.} =
retry:
var session = await getAndValidateSession(api)
when url is SessionAwareUrl:
let url = case session.kind
of SessionKind.oauth: url.oauthUrl
of SessionKind.cookie: url.cookieUrl
fetchImpl result:
if not (result.startsWith('{') or result.startsWith('[')):
echo resp.status, ": ", result, " --- url: ", url

View File

@@ -11,7 +11,8 @@ const
graphUserById* = gql / "oPppcargziU1uDQHAUmH-A/UserResultByIdQuery"
graphUserTweets* = gql / "JLApJKFY0MxGTzCoK6ps8Q/UserWithProfileTweetsQueryV2"
graphUserTweetsAndReplies* = gql / "Y86LQY7KMvxn5tu3hFTyPg/UserWithProfileTweetsAndRepliesQueryV2"
graphUserMedia* = gql / "PDfFf8hGeJvUCiTyWtw4wQ/MediaTimelineV2"
graphUserMedia* = gql / "36oKqyQ7E_9CmtONGjJRsA/UserMedia"
graphUserMediaV2* = gql / "PDfFf8hGeJvUCiTyWtw4wQ/MediaTimelineV2"
graphTweet* = gql / "Vorskcd2tZ-tc4Gx3zbk4Q/ConversationTimelineV2"
graphTweetResult* = gql / "sITyJdhRPpvpEjg4waUmTA/TweetResultByIdQuery"
graphSearchTimeline* = gql / "KI9jCXUx3Ymt-hDKLOZb9Q/SearchTimeline"
@@ -25,28 +26,28 @@ const
"blue_business_profile_image_shape_enabled": false,
"creator_subscriptions_subscription_count_enabled": false,
"creator_subscriptions_tweet_preview_api_enabled": true,
"freedom_of_speech_not_reach_fetch_enabled": false,
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": false,
"freedom_of_speech_not_reach_fetch_enabled": true,
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": true,
"hidden_profile_likes_enabled": false,
"highlights_tweets_tab_ui_enabled": false,
"interactive_text_enabled": false,
"longform_notetweets_consumption_enabled": true,
"longform_notetweets_inline_media_enabled": false,
"longform_notetweets_inline_media_enabled": true,
"longform_notetweets_richtext_consumption_enabled": true,
"longform_notetweets_rich_text_read_enabled": false,
"responsive_web_edit_tweet_api_enabled": false,
"longform_notetweets_rich_text_read_enabled": true,
"responsive_web_edit_tweet_api_enabled": true,
"responsive_web_enhance_cards_enabled": false,
"responsive_web_graphql_exclude_directive_enabled": true,
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,
"responsive_web_graphql_timeline_navigation_enabled": false,
"responsive_web_graphql_timeline_navigation_enabled": true,
"responsive_web_media_download_video_enabled": false,
"responsive_web_text_conversations_enabled": false,
"responsive_web_twitter_article_tweet_consumption_enabled": false,
"responsive_web_twitter_article_tweet_consumption_enabled": true,
"responsive_web_twitter_blue_verified_badge_is_enabled": true,
"rweb_lists_timeline_redesign_enabled": true,
"spaces_2022_h2_clipping": true,
"spaces_2022_h2_spaces_communities": true,
"standardized_nudges_misinfo": false,
"standardized_nudges_misinfo": true,
"subscriptions_verification_info_enabled": true,
"subscriptions_verification_info_reason_enabled": true,
"subscriptions_verification_info_verified_since_enabled": true,
@@ -55,28 +56,34 @@ const
"super_follow_tweet_api_enabled": false,
"super_follow_user_api_enabled": false,
"tweet_awards_web_tipping_enabled": false,
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false,
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": true,
"tweetypie_unmention_optimization_enabled": false,
"unified_cards_ad_metadata_container_dynamic_card_content_query_enabled": false,
"verified_phone_label_enabled": false,
"vibe_api_enabled": false,
"view_counts_everywhere_api_enabled": false,
"view_counts_everywhere_api_enabled": true,
"premium_content_api_read_enabled": false,
"communities_web_enable_tweet_community_results_fetch": false,
"responsive_web_jetfuel_frame": false,
"communities_web_enable_tweet_community_results_fetch": true,
"responsive_web_jetfuel_frame": true,
"responsive_web_grok_analyze_button_fetch_trends_enabled": false,
"responsive_web_grok_image_annotation_enabled": false,
"rweb_tipjar_consumption_enabled": false,
"profile_label_improvements_pcf_label_in_post_enabled": false,
"responsive_web_grok_image_annotation_enabled": true,
"responsive_web_grok_imagine_annotation_enabled": true,
"rweb_tipjar_consumption_enabled": true,
"profile_label_improvements_pcf_label_in_post_enabled": true,
"creator_subscriptions_quote_tweet_preview_enabled": false,
"c9s_tweet_anatomy_moderator_badge_enabled": false,
"responsive_web_grok_analyze_post_followups_enabled": false,
"c9s_tweet_anatomy_moderator_badge_enabled": true,
"responsive_web_grok_analyze_post_followups_enabled": true,
"rweb_video_timestamps_enabled": false,
"responsive_web_grok_share_attachment_enabled": false,
"articles_preview_enabled": false,
"responsive_web_grok_share_attachment_enabled": true,
"articles_preview_enabled": true,
"immersive_video_status_linkable_timestamps": false,
"articles_api_enabled": false,
"responsive_web_grok_analysis_button_from_backend": false
"responsive_web_grok_analysis_button_from_backend": true,
"rweb_video_screen_enabled": false,
"payments_enabled": false,
"responsive_web_profile_redirect_enabled": false,
"responsive_web_grok_show_grok_translated_post": false,
"responsive_web_grok_community_note_auto_translation_is_enabled": false
}""".replace(" ", "").replace("\n", "")
tweetVariables* = """{
@@ -110,3 +117,12 @@ const
"rest_id": "$1", $2
"count": 20
}"""
userMediaVariables* = """{
"userId": "$1", $2
"count": 20,
"includePromotedContent": false,
"withClientEventToken": false,
"withBirdwatchNotes": false,
"withVoice": true
}""".replace(" ", "").replace("\n", "")

View File

@@ -32,10 +32,26 @@ proc parseGraphUser(js: JsonNode): User =
var user = js{"user_result", "result"}
if user.isNull:
user = ? js{"user_results", "result"}
if user.isNull:
if js{"core"}.notNull and js{"legacy"}.notNull:
user = js
else:
return
result = parseUser(user{"legacy"}, user{"rest_id"}.getStr)
if result.verifiedType == VerifiedType.none and user{"is_blue_verified"}.getBool(false):
result.verifiedType = blue
# fallback to support UserMedia/recent GraphQL updates
if result.username.len == 0 and user{"core", "screen_name"}.notNull:
result.username = user{"core", "screen_name"}.getStr
result.fullname = user{"core", "name"}.getStr
result.userPic = user{"avatar", "image_url"}.getImageStr.replace("_normal", "")
if user{"is_blue_verified"}.getBool(false):
result.verifiedType = blue
elif user{"verification", "verified_type"}.notNull:
let verifiedType = user{"verification", "verified_type"}.getStr("None")
result.verifiedType = parseEnum[VerifiedType](verifiedType)
proc parseGraphList*(js: JsonNode): List =
if js.isNull: return
@@ -400,31 +416,59 @@ proc parseGraphConversation*(js: JsonNode; tweetId: string; v2=true): Conversati
elif entryId.startsWith("cursor-bottom"):
result.replies.bottom = e{"content", contentKey, "value"}.getStr
proc parseGraphTimeline*(js: JsonNode; root: string; after=""): Profile =
proc extractTweetsFromEntry*(e: JsonNode; entryId: string): seq[Tweet] =
if e{"content", "items"}.notNull:
for item in e{"content", "items"}:
with tweetResult, item{"item", "itemContent", "tweet_results", "result"}:
var tweet = parseGraphTweet(tweetResult, false)
if not tweet.available:
tweet.id = parseBiggestInt(item{"entryId"}.getStr.getId())
result.add tweet
return
with tweetResult, e{"content", "content", "tweetResult", "result"}:
var tweet = parseGraphTweet(tweetResult, false)
if not tweet.available:
tweet.id = parseBiggestInt(entryId.getId())
result.add tweet
proc parseGraphTimeline*(js: JsonNode; after=""): Profile =
result = Profile(tweets: Timeline(beginning: after.len == 0))
let instructions =
if root == "list": ? js{"data", "list", "timeline_response", "timeline", "instructions"}
else: ? js{"data", "user_result", "result", "timeline_response", "timeline", "instructions"}
if js{"data", "list"}.notNull:
? js{"data", "list", "timeline_response", "timeline", "instructions"}
elif js{"data", "user"}.notNull:
? js{"data", "user", "result", "timeline", "timeline", "instructions"}
else:
? js{"data", "user_result", "result", "timeline_response", "timeline", "instructions"}
if instructions.len == 0:
return
for i in instructions:
if i{"__typename"}.getStr == "TimelineAddEntries":
# TimelineAddToModule instruction is used by UserMedia
if i{"moduleItems"}.notNull:
for item in i{"moduleItems"}:
with tweetResult, item{"item", "itemContent", "tweet_results", "result"}:
let tweet = parseGraphTweet(tweetResult, false)
if not tweet.available:
tweet.id = parseBiggestInt(item{"entryId"}.getStr.getId())
result.tweets.content.add tweet
continue
if i{"entries"}.notNull:
for e in i{"entries"}:
let entryId = e{"entryId"}.getStr
if entryId.startsWith("tweet"):
with tweetResult, e{"content", "content", "tweetResult", "result"}:
let tweet = parseGraphTweet(tweetResult, false)
if not tweet.available:
tweet.id = parseBiggestInt(entryId.getId())
if entryId.startsWith("tweet") or entryId.startsWith("profile-grid"):
for tweet in extractTweetsFromEntry(e, entryId):
result.tweets.content.add tweet
elif "-conversation-" in entryId or entryId.startsWith("homeConversation"):
let (thread, self) = parseGraphThread(e)
result.tweets.content.add thread.content
elif entryId.startsWith("cursor-bottom"):
result.tweets.bottom = e{"content", "value"}.getStr
if after.len == 0 and i{"__typename"}.getStr == "TimelinePinEntry":
with tweetResult, i{"entry", "content", "content", "tweetResult", "result"}:
let tweet = parseGraphTweet(tweetResult, false)
@@ -439,30 +483,45 @@ proc parseGraphPhotoRail*(js: JsonNode): PhotoRail =
result = @[]
let instructions =
? js{"data", "user_result", "result", "timeline_response", "timeline", "instructions"}
if js{"data", "user"}.notNull:
? js{"data", "user", "result", "timeline", "timeline", "instructions"}
else:
? js{"data", "user_result", "result", "timeline_response", "timeline", "instructions"}
if instructions.len == 0:
return
for i in instructions:
if i{"__typename"}.getStr == "TimelineAddEntries":
for e in i{"entries"}:
let entryId = e{"entryId"}.getStr
if entryId.startsWith("tweet"):
with tweetResult, e{"content", "content", "tweetResult", "result"}:
let t = parseGraphTweet(tweetResult, false)
if not t.available:
t.id = parseBiggestInt(entryId.getId())
# TimelineAddToModule instruction is used by MediaTimelineV2
if i{"moduleItems"}.notNull:
for item in i{"moduleItems"}:
with tweetResult, item{"item", "itemContent", "tweet_results", "result"}:
let t = parseGraphTweet(tweetResult, false)
if not t.available:
t.id = parseBiggestInt(item{"entryId"}.getStr.getId())
let url =
if t.photos.len > 0: t.photos[0]
elif t.video.isSome: get(t.video).thumb
elif t.gif.isSome: get(t.gif).thumb
elif t.card.isSome: get(t.card).image
else: ""
let photo = extractGalleryPhoto(t)
if photo.url.len > 0:
result.add photo
if url.len > 0:
result.add GalleryPhoto(url: url, tweetId: $t.id)
if result.len == 16:
return
continue
if result.len == 16:
break
let instrType = i{"type"}.getStr(i{"__typename"}.getStr)
if instrType != "TimelineAddEntries":
continue
for e in i{"entries"}:
let entryId = e{"entryId"}.getStr
if entryId.startsWith("tweet") or entryId.startsWith("profile-grid"):
for t in extractTweetsFromEntry(e, entryId):
let photo = extractGalleryPhoto(t)
if photo.url.len > 0:
result.add photo
if result.len == 16:
return
proc parseGraphSearch*[T: User | Tweets](js: JsonNode; after=""): Result[T] =
result = Result[T](beginning: after.len == 0)

View File

@@ -319,3 +319,13 @@ proc expandNoteTweetEntities*(tweet: Tweet; js: JsonNode) =
tweet.expandTextEntities(entities, text, textSlice)
tweet.text = tweet.text.multiReplace((unicodeOpen, xmlOpen), (unicodeClose, xmlClose))
proc extractGalleryPhoto*(t: Tweet): GalleryPhoto =
let url =
if t.photos.len > 0: t.photos[0]
elif t.video.isSome: get(t.video).thumb
elif t.gif.isSome: get(t.gif).thumb
elif t.card.isSome: get(t.card).image
else: ""
result = GalleryPhoto(url: url, tweetId: $t.id)

View File

@@ -1,5 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-only
import times, sequtils, options, tables
import times, sequtils, options, tables, uri
import prefs_impl
genPrefsType()
@@ -50,6 +50,10 @@ type
authToken*: string
ct0*: string
SessionAwareUrl* = object
oauthUrl*: Uri
cookieUrl*: Uri
Error* = enum
null = 0
noUserMatches = 17

View File

@@ -28,6 +28,7 @@ import json
import asyncio
import pyotp
import nodriver as uc
import os
async def login_and_get_cookies(username, password, totp_seed=None, headless=False):
@@ -76,9 +77,9 @@ async def login_and_get_cookies(username, password, totp_seed=None, headless=Fal
twid = cookies_dict['twid']
# Try to extract the ID from twid (format: u%3D<id> or u=<id>)
if 'u%3D' in twid:
user_id = twid.split('u%3D')[1].split('&')[0]
user_id = twid.split('u%3D')[1].split('&')[0].strip('"')
elif 'u=' in twid:
user_id = twid.split('u=')[1].split('&')[0]
user_id = twid.split('u=')[1].split('&')[0].strip('"')
cookies_dict['username'] = username
if user_id:
@@ -106,13 +107,27 @@ async def main():
headless = False
# Parse optional arguments
for i, arg in enumerate(sys.argv[3:], 3):
if arg == '--append' and i + 1 < len(sys.argv):
append_file = sys.argv[i + 1]
i = 3
while i < len(sys.argv):
arg = sys.argv[i]
if arg == '--append':
if i + 1 < len(sys.argv):
append_file = sys.argv[i + 1]
i += 2 # Skip '--append' and filename
else:
print('[!] Error: --append requires a filename', file=sys.stderr)
sys.exit(1)
elif arg == '--headless':
headless = True
i += 1
elif not arg.startswith('--'):
totp_seed = arg
if totp_seed is None:
totp_seed = arg
i += 1
else:
# Unkown args
print(f'[!] Warning: Unknown argument: {arg}', file=sys.stderr)
i += 1
try:
cookies = await login_and_get_cookies(username, password, totp_seed, headless)