From 6db32bbd200ceb47401a85b7bb066a73a0485fa1 Mon Sep 17 00:00:00 2001
From: occasionallydavid <62972099+occasionallydavid@users.noreply.github.com>
Date: Fri, 11 Apr 2025 02:40:36 +0100
Subject: [PATCH] Add support for scraping from --list ID

---
 README.md                  |  6 ++++++
 scraper/__main__.py        | 11 +++++++++++
 scraper/twitter_scraper.py | 17 +++++++++++++++++
 3 files changed, 34 insertions(+)

diff --git a/README.md b/README.md
index 8d6c068..f32ca95 100644
--- a/README.md
+++ b/README.md
@@ -103,6 +103,12 @@ options:                description
                             -ht javascript
                             --hashtag=javascript
 
+-l, --list              : List ID. Scrape tweets from a list. The
+                          ID is taken from the x.com/list/... URL.
+                          e.g.
+                           -l "1324132413151"
+                           --list "1324132413151"
+
 -q, --query             : Twitter query or search.
                           Scrape tweets from a query or search.
                           e.g.
diff --git a/scraper/__main__.py b/scraper/__main__.py
index c572467..89024cc 100644
--- a/scraper/__main__.py
+++ b/scraper/__main__.py
@@ -93,6 +93,14 @@ def main():
             help="Set no limit to the number of tweets to scrape (will scrap until no more tweets are available).",
         )
 
+        parser.add_argument(
+            "-l",
+            "--list",
+            type=str,
+            default=None,
+            help="List ID. Scrape tweets from a list.",
+        )
+
         parser.add_argument(
             "-q",
             "--query",
@@ -145,6 +153,8 @@ def main():
             tweet_type_args.append(args.username)
         if args.hashtag is not None:
             tweet_type_args.append(args.hashtag)
+        if args.list is not None:
+            tweet_type_args.append(args.list)
         if args.query is not None:
             tweet_type_args.append(args.query)
         if args.bookmarks is not False:
@@ -175,6 +185,7 @@ def main():
                 scrape_hashtag=args.hashtag,
                 scrape_bookmarks=args.bookmarks,
                 scrape_query=args.query,
+                scrape_list=args.list,
                 scrape_latest=args.latest,
                 scrape_top=args.top,
                 scrape_poster_details="pd" in additional_data,
diff --git a/scraper/twitter_scraper.py b/scraper/twitter_scraper.py
index 6595bc4..fd34e78 100644
--- a/scraper/twitter_scraper.py
+++ b/scraper/twitter_scraper.py
@@ -90,6 +90,7 @@ class Twitter_Scraper:
         scrape_hashtag=None,
         scrape_bookmarks=False,
         scrape_query=None,
+        scrape_list=None,
         scrape_latest=True,
         scrape_top=False,
         scrape_poster_details=False,
@@ -107,6 +108,7 @@ class Twitter_Scraper:
             else None,
             "bookmarks": scrape_bookmarks,
             "query": scrape_query,
+            "list": scrape_list,
             "tab": "Latest" if scrape_latest else "Top" if scrape_top else "Latest",
             "poster_details": scrape_poster_details,
         }
@@ -125,6 +127,9 @@ class Twitter_Scraper:
         elif scrape_query is not None:
             self.scraper_details["type"] = "Query"
             self.router = self.go_to_search
+        elif scrape_list is not None:
+            self.scraper_details["type"] = "List"
+            self.router = self.go_to_list
         else:
             self.scraper_details["type"] = "Home"
             self.router = self.go_to_home
@@ -383,6 +388,16 @@ It may be due to the following:
             sleep(3)
         pass
 
+    def go_to_list(self):
+        if self.scraper_details["list"] is None or self.scraper_details["list"] == "":
+            print("List is not set.")
+            sys.exit(1)
+        else:
+            url = f"https://x.com/i/lists/{self.scraper_details['list']}"
+            self.driver.get(url)
+            sleep(3)
+        pass
+
     def get_tweet_cards(self):
         self.tweet_cards = self.driver.find_elements(
             "xpath", '//article[@data-testid="tweet" and not(@disabled)]'
@@ -411,6 +426,7 @@ It may be due to the following:
         scrape_hashtag=None,
         scrape_bookmarks=False,
         scrape_query=None,
+        scrape_list=None,
         scrape_latest=True,
         scrape_top=False,
         scrape_poster_details=False,
@@ -422,6 +438,7 @@ It may be due to the following:
             scrape_hashtag,
             scrape_bookmarks,
             scrape_query,
+            scrape_list,
             scrape_latest,
             scrape_top,
             scrape_poster_details,