feat: scrape tweet link, tweet id, and user id

This commit is contained in:
Jarrian
2023-09-28 11:30:29 +08:00
parent e3960dedcc
commit 975b5197b8
2 changed files with 46 additions and 3 deletions

View File

@@ -148,8 +148,19 @@ class Tweet:
except NoSuchElementException:
self.profile_img = ""
try:
self.tweet_link = self.card.find_element(
"xpath",
".//a[contains(@href, '/status/')]",
).get_attribute("href")
self.tweet_id = str(self.tweet_link.split("/")[-1])
except NoSuchElementException:
self.tweet_link = ""
self.tweet_id = ""
self.following_cnt = "0"
self.followers_cnt = "0"
self.user_id = None
if scrape_poster_details:
el_name = card.find_element(
@@ -157,11 +168,17 @@ class Tweet:
)
ext_hover_card = False
ext_user_id = False
ext_following = False
ext_followers = False
hover_attempt = 0
while not ext_hover_card or not ext_following or not ext_followers:
while (
not ext_hover_card
or not ext_user_id
or not ext_following
or not ext_followers
):
try:
actions.move_to_element(el_name).perform()
@@ -171,6 +188,25 @@ class Tweet:
ext_hover_card = True
while not ext_user_id:
try:
raw_user_id = hover_card.find_element(
"xpath",
'(.//div[contains(@data-testid, "-follow")]) | (.//div[contains(@data-testid, "-unfollow")])',
).get_attribute("data-testid")
if raw_user_id == "":
self.user_id = None
else:
self.user_id = str(raw_user_id.split("-")[0])
ext_user_id = True
except NoSuchElementException:
continue
except StaleElementReferenceException:
self.error = True
return
while not ext_following:
try:
self.following_cnt = hover_card.find_element(
@@ -231,6 +267,9 @@ class Tweet:
self.mentions,
self.emojis,
self.profile_img,
self.tweet_link,
self.tweet_id,
self.user_id,
self.following_cnt,
self.followers_cnt,
)

View File

@@ -502,16 +502,20 @@ It may be due to the following:
"Mentions": [tweet[10] for tweet in self.data],
"Emojis": [tweet[11] for tweet in self.data],
"Profile Image": [tweet[12] for tweet in self.data],
"Tweet Link": [tweet[13] for tweet in self.data],
"Tweet ID": [f"tweet_id:{tweet[14]}" for tweet in self.data],
}
if self.scraper_details["poster_details"]:
data["Following"] = [tweet[13] for tweet in self.data]
data["Followers"] = [tweet[14] for tweet in self.data]
data["Tweeter ID"] = [f"user_id:{tweet[15]}" for tweet in self.data]
data["Following"] = [tweet[16] for tweet in self.data]
data["Followers"] = [tweet[17] for tweet in self.data]
df = pd.DataFrame(data)
current_time = now.strftime("%Y-%m-%d_%H-%M-%S")
file_path = f"{folder_path}{current_time}_tweets_1-{len(self.data)}.csv"
pd.set_option("display.max_colwidth", None)
df.to_csv(file_path, index=False, encoding="utf-8")
print("CSV Saved: {}".format(file_path))