Merge branch 'godkingjay:master' into master

This commit is contained in:
Salastil
2025-04-21 18:56:02 -04:00
committed by GitHub
3 changed files with 34 additions and 0 deletions

View File

@@ -93,6 +93,14 @@ def main():
help="Set no limit to the number of tweets to scrape (will scrap until no more tweets are available).",
)
parser.add_argument(
"-l",
"--list",
type=str,
default=None,
help="List ID. Scrape tweets from a list.",
)
parser.add_argument(
"-q",
"--query",
@@ -145,6 +153,8 @@ def main():
tweet_type_args.append(args.username)
if args.hashtag is not None:
tweet_type_args.append(args.hashtag)
if args.list is not None:
tweet_type_args.append(args.list)
if args.query is not None:
tweet_type_args.append(args.query)
if args.bookmarks is not False:
@@ -175,6 +185,7 @@ def main():
scrape_hashtag=args.hashtag,
scrape_bookmarks=args.bookmarks,
scrape_query=args.query,
scrape_list=args.list,
scrape_latest=args.latest,
scrape_top=args.top,
scrape_poster_details="pd" in additional_data,

View File

@@ -90,6 +90,7 @@ class Twitter_Scraper:
scrape_hashtag=None,
scrape_bookmarks=False,
scrape_query=None,
scrape_list=None,
scrape_latest=True,
scrape_top=False,
scrape_poster_details=False,
@@ -107,6 +108,7 @@ class Twitter_Scraper:
else None,
"bookmarks": scrape_bookmarks,
"query": scrape_query,
"list": scrape_list,
"tab": "Latest" if scrape_latest else "Top" if scrape_top else "Latest",
"poster_details": scrape_poster_details,
}
@@ -125,6 +127,9 @@ class Twitter_Scraper:
elif scrape_query is not None:
self.scraper_details["type"] = "Query"
self.router = self.go_to_search
elif scrape_list is not None:
self.scraper_details["type"] = "List"
self.router = self.go_to_list
else:
self.scraper_details["type"] = "Home"
self.router = self.go_to_home
@@ -385,6 +390,16 @@ It may be due to the following:
sleep(3)
pass
def go_to_list(self):
if self.scraper_details["list"] is None or self.scraper_details["list"] == "":
print("List is not set.")
sys.exit(1)
else:
url = f"https://x.com/i/lists/{self.scraper_details['list']}"
self.driver.get(url)
sleep(3)
pass
def get_tweet_cards(self):
self.tweet_cards = self.driver.find_elements(
"xpath", '//article[@data-testid="tweet" and not(@disabled)]'
@@ -413,6 +428,7 @@ It may be due to the following:
scrape_hashtag=None,
scrape_bookmarks=False,
scrape_query=None,
scrape_list=None,
scrape_latest=True,
scrape_top=False,
scrape_poster_details=False,
@@ -424,6 +440,7 @@ It may be due to the following:
scrape_hashtag,
scrape_bookmarks,
scrape_query,
scrape_list,
scrape_latest,
scrape_top,
scrape_poster_details,