From f4d043bbfa43e7ba88976064d7b756bb9579ad32 Mon Sep 17 00:00:00 2001 From: Zach Brasseaux Date: Fri, 15 Nov 2024 15:09:01 -0500 Subject: [PATCH] add backend for twitter bookmarks archive --- scraper/twitter_scraper.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/scraper/twitter_scraper.py b/scraper/twitter_scraper.py index 4fd0fcd..c61d363 100644 --- a/scraper/twitter_scraper.py +++ b/scraper/twitter_scraper.py @@ -41,6 +41,7 @@ class Twitter_Scraper: scrape_username=None, scrape_hashtag=None, scrape_query=None, + scrape_bookmarks=False, scrape_poster_details=False, scrape_latest=True, scrape_top=False, @@ -58,6 +59,7 @@ class Twitter_Scraper: "type": None, "username": None, "hashtag": None, + "bookmarks": False, "query": None, "tab": None, "poster_details": False, @@ -72,6 +74,7 @@ class Twitter_Scraper: max_tweets, scrape_username, scrape_hashtag, + scrape_bookmarks, scrape_query, scrape_latest, scrape_top, @@ -83,6 +86,7 @@ class Twitter_Scraper: max_tweets=50, scrape_username=None, scrape_hashtag=None, + scrape_bookmarks=False, scrape_query=None, scrape_latest=True, scrape_top=False, @@ -99,6 +103,7 @@ class Twitter_Scraper: "hashtag": str(scrape_hashtag).replace("#", "") if scrape_hashtag is not None else None, + "bookmarks": scrape_bookmarks, "query": scrape_query, "tab": "Latest" if scrape_latest else "Top" if scrape_top else "Latest", "poster_details": scrape_poster_details, @@ -112,6 +117,9 @@ class Twitter_Scraper: elif scrape_hashtag is not None: self.scraper_details["type"] = "Hashtag" self.router = self.go_to_hashtag + elif scrape_bookmarks is not False: + self.scraper_details["type"] = "Bookmarks" + self.router = self.go_to_bookmarks elif scrape_query is not None: self.scraper_details["type"] = "Query" self.router = self.go_to_search @@ -120,6 +128,8 @@ class Twitter_Scraper: self.router = self.go_to_home pass + print(134) + def _get_driver( self, proxy=None, @@ -339,6 +349,21 @@ It may be due to the following: sleep(3) pass + def go_to_bookmarks(self): + print("356") + if ( + self.scraper_details["bookmarks"] is False + or self.scraper_details["bookmarks"] == "" + ): + print("Bookmarks is not set.") + sys.exit(1) + else: + url = f"https://twitter..com/i/bookmarks" + + self.driver.get(url) + sleep(3) + pass + def go_to_search(self): if self.scraper_details["query"] is None or self.scraper_details["query"] == "": print("Query is not set.") @@ -378,6 +403,7 @@ It may be due to the following: no_tweets_limit=False, scrape_username=None, scrape_hashtag=None, + scrape_bookmarks=False, scrape_query=None, scrape_latest=True, scrape_top=False, @@ -388,6 +414,7 @@ It may be due to the following: max_tweets, scrape_username, scrape_hashtag, + scrape_bookmarks, scrape_query, scrape_latest, scrape_top, @@ -409,6 +436,9 @@ It may be due to the following: self.scraper_details["tab"], self.scraper_details["hashtag"] ) ) + elif self.scraper_details["type"] == "Bookmarks": + print( + "Scraping Tweets from @{} bookmarks...".format(self.scraper_details["username"])) elif self.scraper_details["type"] == "Query": print( "Scraping {} Tweets from {} search...".format(