feat: scrape tweet link, tweet id, and user id
This commit is contained in:
@@ -148,8 +148,19 @@ class Tweet:
|
||||
except NoSuchElementException:
|
||||
self.profile_img = ""
|
||||
|
||||
try:
|
||||
self.tweet_link = self.card.find_element(
|
||||
"xpath",
|
||||
".//a[contains(@href, '/status/')]",
|
||||
).get_attribute("href")
|
||||
self.tweet_id = str(self.tweet_link.split("/")[-1])
|
||||
except NoSuchElementException:
|
||||
self.tweet_link = ""
|
||||
self.tweet_id = ""
|
||||
|
||||
self.following_cnt = "0"
|
||||
self.followers_cnt = "0"
|
||||
self.user_id = None
|
||||
|
||||
if scrape_poster_details:
|
||||
el_name = card.find_element(
|
||||
@@ -157,11 +168,17 @@ class Tweet:
|
||||
)
|
||||
|
||||
ext_hover_card = False
|
||||
ext_user_id = False
|
||||
ext_following = False
|
||||
ext_followers = False
|
||||
hover_attempt = 0
|
||||
|
||||
while not ext_hover_card or not ext_following or not ext_followers:
|
||||
while (
|
||||
not ext_hover_card
|
||||
or not ext_user_id
|
||||
or not ext_following
|
||||
or not ext_followers
|
||||
):
|
||||
try:
|
||||
actions.move_to_element(el_name).perform()
|
||||
|
||||
@@ -171,6 +188,25 @@ class Tweet:
|
||||
|
||||
ext_hover_card = True
|
||||
|
||||
while not ext_user_id:
|
||||
try:
|
||||
raw_user_id = hover_card.find_element(
|
||||
"xpath",
|
||||
'(.//div[contains(@data-testid, "-follow")]) | (.//div[contains(@data-testid, "-unfollow")])',
|
||||
).get_attribute("data-testid")
|
||||
|
||||
if raw_user_id == "":
|
||||
self.user_id = None
|
||||
else:
|
||||
self.user_id = str(raw_user_id.split("-")[0])
|
||||
|
||||
ext_user_id = True
|
||||
except NoSuchElementException:
|
||||
continue
|
||||
except StaleElementReferenceException:
|
||||
self.error = True
|
||||
return
|
||||
|
||||
while not ext_following:
|
||||
try:
|
||||
self.following_cnt = hover_card.find_element(
|
||||
@@ -231,6 +267,9 @@ class Tweet:
|
||||
self.mentions,
|
||||
self.emojis,
|
||||
self.profile_img,
|
||||
self.tweet_link,
|
||||
self.tweet_id,
|
||||
self.user_id,
|
||||
self.following_cnt,
|
||||
self.followers_cnt,
|
||||
)
|
||||
|
||||
@@ -502,16 +502,20 @@ It may be due to the following:
|
||||
"Mentions": [tweet[10] for tweet in self.data],
|
||||
"Emojis": [tweet[11] for tweet in self.data],
|
||||
"Profile Image": [tweet[12] for tweet in self.data],
|
||||
"Tweet Link": [tweet[13] for tweet in self.data],
|
||||
"Tweet ID": [f"tweet_id:{tweet[14]}" for tweet in self.data],
|
||||
}
|
||||
|
||||
if self.scraper_details["poster_details"]:
|
||||
data["Following"] = [tweet[13] for tweet in self.data]
|
||||
data["Followers"] = [tweet[14] for tweet in self.data]
|
||||
data["Tweeter ID"] = [f"user_id:{tweet[15]}" for tweet in self.data]
|
||||
data["Following"] = [tweet[16] for tweet in self.data]
|
||||
data["Followers"] = [tweet[17] for tweet in self.data]
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
current_time = now.strftime("%Y-%m-%d_%H-%M-%S")
|
||||
file_path = f"{folder_path}{current_time}_tweets_1-{len(self.data)}.csv"
|
||||
pd.set_option("display.max_colwidth", None)
|
||||
df.to_csv(file_path, index=False, encoding="utf-8")
|
||||
|
||||
print("CSV Saved: {}".format(file_path))
|
||||
|
||||
Reference in New Issue
Block a user