Add support for scraping from --list ID
This commit is contained in:
@@ -103,6 +103,12 @@ options: description
|
|||||||
-ht javascript
|
-ht javascript
|
||||||
--hashtag=javascript
|
--hashtag=javascript
|
||||||
|
|
||||||
|
-l, --list : List ID. Scrape tweets from a list. The
|
||||||
|
ID is taken from the x.com/list/... URL.
|
||||||
|
e.g.
|
||||||
|
-l "1324132413151"
|
||||||
|
--list "1324132413151"
|
||||||
|
|
||||||
-q, --query : Twitter query or search.
|
-q, --query : Twitter query or search.
|
||||||
Scrape tweets from a query or search.
|
Scrape tweets from a query or search.
|
||||||
e.g.
|
e.g.
|
||||||
|
|||||||
@@ -93,6 +93,14 @@ def main():
|
|||||||
help="Set no limit to the number of tweets to scrape (will scrap until no more tweets are available).",
|
help="Set no limit to the number of tweets to scrape (will scrap until no more tweets are available).",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"-l",
|
||||||
|
"--list",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="List ID. Scrape tweets from a list.",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-q",
|
"-q",
|
||||||
"--query",
|
"--query",
|
||||||
@@ -145,6 +153,8 @@ def main():
|
|||||||
tweet_type_args.append(args.username)
|
tweet_type_args.append(args.username)
|
||||||
if args.hashtag is not None:
|
if args.hashtag is not None:
|
||||||
tweet_type_args.append(args.hashtag)
|
tweet_type_args.append(args.hashtag)
|
||||||
|
if args.list is not None:
|
||||||
|
tweet_type_args.append(args.list)
|
||||||
if args.query is not None:
|
if args.query is not None:
|
||||||
tweet_type_args.append(args.query)
|
tweet_type_args.append(args.query)
|
||||||
if args.bookmarks is not False:
|
if args.bookmarks is not False:
|
||||||
@@ -175,6 +185,7 @@ def main():
|
|||||||
scrape_hashtag=args.hashtag,
|
scrape_hashtag=args.hashtag,
|
||||||
scrape_bookmarks=args.bookmarks,
|
scrape_bookmarks=args.bookmarks,
|
||||||
scrape_query=args.query,
|
scrape_query=args.query,
|
||||||
|
scrape_list=args.list,
|
||||||
scrape_latest=args.latest,
|
scrape_latest=args.latest,
|
||||||
scrape_top=args.top,
|
scrape_top=args.top,
|
||||||
scrape_poster_details="pd" in additional_data,
|
scrape_poster_details="pd" in additional_data,
|
||||||
|
|||||||
@@ -90,6 +90,7 @@ class Twitter_Scraper:
|
|||||||
scrape_hashtag=None,
|
scrape_hashtag=None,
|
||||||
scrape_bookmarks=False,
|
scrape_bookmarks=False,
|
||||||
scrape_query=None,
|
scrape_query=None,
|
||||||
|
scrape_list=None,
|
||||||
scrape_latest=True,
|
scrape_latest=True,
|
||||||
scrape_top=False,
|
scrape_top=False,
|
||||||
scrape_poster_details=False,
|
scrape_poster_details=False,
|
||||||
@@ -107,6 +108,7 @@ class Twitter_Scraper:
|
|||||||
else None,
|
else None,
|
||||||
"bookmarks": scrape_bookmarks,
|
"bookmarks": scrape_bookmarks,
|
||||||
"query": scrape_query,
|
"query": scrape_query,
|
||||||
|
"list": scrape_list,
|
||||||
"tab": "Latest" if scrape_latest else "Top" if scrape_top else "Latest",
|
"tab": "Latest" if scrape_latest else "Top" if scrape_top else "Latest",
|
||||||
"poster_details": scrape_poster_details,
|
"poster_details": scrape_poster_details,
|
||||||
}
|
}
|
||||||
@@ -125,6 +127,9 @@ class Twitter_Scraper:
|
|||||||
elif scrape_query is not None:
|
elif scrape_query is not None:
|
||||||
self.scraper_details["type"] = "Query"
|
self.scraper_details["type"] = "Query"
|
||||||
self.router = self.go_to_search
|
self.router = self.go_to_search
|
||||||
|
elif scrape_list is not None:
|
||||||
|
self.scraper_details["type"] = "List"
|
||||||
|
self.router = self.go_to_list
|
||||||
else:
|
else:
|
||||||
self.scraper_details["type"] = "Home"
|
self.scraper_details["type"] = "Home"
|
||||||
self.router = self.go_to_home
|
self.router = self.go_to_home
|
||||||
@@ -383,6 +388,16 @@ It may be due to the following:
|
|||||||
sleep(3)
|
sleep(3)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def go_to_list(self):
|
||||||
|
if self.scraper_details["list"] is None or self.scraper_details["list"] == "":
|
||||||
|
print("List is not set.")
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
url = f"https://x.com/i/lists/{self.scraper_details['list']}"
|
||||||
|
self.driver.get(url)
|
||||||
|
sleep(3)
|
||||||
|
pass
|
||||||
|
|
||||||
def get_tweet_cards(self):
|
def get_tweet_cards(self):
|
||||||
self.tweet_cards = self.driver.find_elements(
|
self.tweet_cards = self.driver.find_elements(
|
||||||
"xpath", '//article[@data-testid="tweet" and not(@disabled)]'
|
"xpath", '//article[@data-testid="tweet" and not(@disabled)]'
|
||||||
@@ -411,6 +426,7 @@ It may be due to the following:
|
|||||||
scrape_hashtag=None,
|
scrape_hashtag=None,
|
||||||
scrape_bookmarks=False,
|
scrape_bookmarks=False,
|
||||||
scrape_query=None,
|
scrape_query=None,
|
||||||
|
scrape_list=None,
|
||||||
scrape_latest=True,
|
scrape_latest=True,
|
||||||
scrape_top=False,
|
scrape_top=False,
|
||||||
scrape_poster_details=False,
|
scrape_poster_details=False,
|
||||||
@@ -422,6 +438,7 @@ It may be due to the following:
|
|||||||
scrape_hashtag,
|
scrape_hashtag,
|
||||||
scrape_bookmarks,
|
scrape_bookmarks,
|
||||||
scrape_query,
|
scrape_query,
|
||||||
|
scrape_list,
|
||||||
scrape_latest,
|
scrape_latest,
|
||||||
scrape_top,
|
scrape_top,
|
||||||
scrape_poster_details,
|
scrape_poster_details,
|
||||||
|
|||||||
Reference in New Issue
Block a user