From f4d043bbfa43e7ba88976064d7b756bb9579ad32 Mon Sep 17 00:00:00 2001 From: Zach Brasseaux Date: Fri, 15 Nov 2024 15:09:01 -0500 Subject: [PATCH 1/3] add backend for twitter bookmarks archive --- scraper/twitter_scraper.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/scraper/twitter_scraper.py b/scraper/twitter_scraper.py index 4fd0fcd..c61d363 100644 --- a/scraper/twitter_scraper.py +++ b/scraper/twitter_scraper.py @@ -41,6 +41,7 @@ class Twitter_Scraper: scrape_username=None, scrape_hashtag=None, scrape_query=None, + scrape_bookmarks=False, scrape_poster_details=False, scrape_latest=True, scrape_top=False, @@ -58,6 +59,7 @@ class Twitter_Scraper: "type": None, "username": None, "hashtag": None, + "bookmarks": False, "query": None, "tab": None, "poster_details": False, @@ -72,6 +74,7 @@ class Twitter_Scraper: max_tweets, scrape_username, scrape_hashtag, + scrape_bookmarks, scrape_query, scrape_latest, scrape_top, @@ -83,6 +86,7 @@ class Twitter_Scraper: max_tweets=50, scrape_username=None, scrape_hashtag=None, + scrape_bookmarks=False, scrape_query=None, scrape_latest=True, scrape_top=False, @@ -99,6 +103,7 @@ class Twitter_Scraper: "hashtag": str(scrape_hashtag).replace("#", "") if scrape_hashtag is not None else None, + "bookmarks": scrape_bookmarks, "query": scrape_query, "tab": "Latest" if scrape_latest else "Top" if scrape_top else "Latest", "poster_details": scrape_poster_details, @@ -112,6 +117,9 @@ class Twitter_Scraper: elif scrape_hashtag is not None: self.scraper_details["type"] = "Hashtag" self.router = self.go_to_hashtag + elif scrape_bookmarks is not False: + self.scraper_details["type"] = "Bookmarks" + self.router = self.go_to_bookmarks elif scrape_query is not None: self.scraper_details["type"] = "Query" self.router = self.go_to_search @@ -120,6 +128,8 @@ class Twitter_Scraper: self.router = self.go_to_home pass + print(134) + def _get_driver( self, proxy=None, @@ -339,6 +349,21 @@ It may be due to the following: sleep(3) pass + def go_to_bookmarks(self): + print("356") + if ( + self.scraper_details["bookmarks"] is False + or self.scraper_details["bookmarks"] == "" + ): + print("Bookmarks is not set.") + sys.exit(1) + else: + url = f"https://twitter..com/i/bookmarks" + + self.driver.get(url) + sleep(3) + pass + def go_to_search(self): if self.scraper_details["query"] is None or self.scraper_details["query"] == "": print("Query is not set.") @@ -378,6 +403,7 @@ It may be due to the following: no_tweets_limit=False, scrape_username=None, scrape_hashtag=None, + scrape_bookmarks=False, scrape_query=None, scrape_latest=True, scrape_top=False, @@ -388,6 +414,7 @@ It may be due to the following: max_tweets, scrape_username, scrape_hashtag, + scrape_bookmarks, scrape_query, scrape_latest, scrape_top, @@ -409,6 +436,9 @@ It may be due to the following: self.scraper_details["tab"], self.scraper_details["hashtag"] ) ) + elif self.scraper_details["type"] == "Bookmarks": + print( + "Scraping Tweets from @{} bookmarks...".format(self.scraper_details["username"])) elif self.scraper_details["type"] == "Query": print( "Scraping {} Tweets from {} search...".format( From 05937c05df89256714de59f27558894a5306157d Mon Sep 17 00:00:00 2001 From: Zach Brasseaux Date: Fri, 15 Nov 2024 15:09:21 -0500 Subject: [PATCH 2/3] add frontend for twitter bookmarks archive --- scraper/__main__.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scraper/__main__.py b/scraper/__main__.py index 7676189..abe265b 100644 --- a/scraper/__main__.py +++ b/scraper/__main__.py @@ -72,6 +72,12 @@ def main(): help="Twitter hashtag. Scrape tweets from a hashtag.", ) + parser.add_argument( + "--bookmarks", + action='store_true', + help="Twitter bookmarks. Scrape tweets from your bookmarks.", + ) + parser.add_argument( "-ntl", "--no_tweets_limit", @@ -130,11 +136,13 @@ def main(): tweet_type_args.append(args.hashtag) if args.query is not None: tweet_type_args.append(args.query) + if args.bookmarks is not False: + tweet_type_args.append(args.query) additional_data: list = args.add.split(",") if len(tweet_type_args) > 1: - print("Please specify only one of --username, --hashtag, or --query.") + print("Please specify only one of --username, --hashtag, --bookmarks, or --query.") sys.exit(1) if args.latest and args.top: @@ -153,6 +161,7 @@ def main(): no_tweets_limit= args.no_tweets_limit if args.no_tweets_limit is not None else True, scrape_username=args.username, scrape_hashtag=args.hashtag, + scrape_bookmarks=args.bookmarks, scrape_query=args.query, scrape_latest=args.latest, scrape_top=args.top, From 338f4814b03a5f7a65e313b87d67309fe82c2d88 Mon Sep 17 00:00:00 2001 From: Zach Brasseaux Date: Fri, 15 Nov 2024 15:14:55 -0500 Subject: [PATCH 3/3] clean up debug statements --- scraper/twitter_scraper.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scraper/twitter_scraper.py b/scraper/twitter_scraper.py index c61d363..aad37f5 100644 --- a/scraper/twitter_scraper.py +++ b/scraper/twitter_scraper.py @@ -128,8 +128,6 @@ class Twitter_Scraper: self.router = self.go_to_home pass - print(134) - def _get_driver( self, proxy=None, @@ -350,7 +348,6 @@ It may be due to the following: pass def go_to_bookmarks(self): - print("356") if ( self.scraper_details["bookmarks"] is False or self.scraper_details["bookmarks"] == "" @@ -438,7 +435,7 @@ It may be due to the following: ) elif self.scraper_details["type"] == "Bookmarks": print( - "Scraping Tweets from @{} bookmarks...".format(self.scraper_details["username"])) + "Scraping Tweets from bookmarks...".format(self.scraper_details["username"])) elif self.scraper_details["type"] == "Query": print( "Scraping {} Tweets from {} search...".format(