feat: scrape mentions and emojis

This commit is contained in:
Jarrian
2023-09-23 10:00:17 +08:00
parent c896baa165
commit 407a717438
3 changed files with 84 additions and 29 deletions

View File

@@ -77,13 +77,6 @@ class Tweet:
except NoSuchElementException:
self.analytics_cnt = "0"
try:
self.profile_img = card.find_element(
"xpath", './/div[@data-testid="Tweet-User-Avatar"]//img'
).get_attribute("src")
except NoSuchElementException:
self.profile_img = ""
try:
self.tags = card.find_elements(
"xpath",
@@ -94,6 +87,36 @@ class Tweet:
except NoSuchElementException:
self.tags = []
try:
self.mentions = card.find_elements(
"xpath",
'(.//div[@data-testid="tweetText"])[1]//a[contains(text(), "@")]',
)
self.mentions = [mention.text for mention in self.mentions]
except NoSuchElementException:
self.mentions = []
try:
raw_emojis = card.find_elements(
"xpath",
'(.//div[@data-testid="tweetText"])[1]/img[contains(@src, "emoji")]',
)
self.emojis = [
emoji.get_attribute("alt").encode("unicode-escape").decode("ASCII")
for emoji in raw_emojis
]
except NoSuchElementException:
self.emojis = []
try:
self.profile_img = card.find_element(
"xpath", './/div[@data-testid="Tweet-User-Avatar"]//img'
).get_attribute("src")
except NoSuchElementException:
self.profile_img = ""
self.tweet = (
self.user,
self.handle,
@@ -105,6 +128,8 @@ class Tweet:
self.like_cnt,
self.analytics_cnt,
self.tags,
self.mentions,
self.emojis,
self.profile_img,
)

View File

@@ -88,6 +88,7 @@ class Twitter_Scraper:
"tab": "Latest" if scrape_latest else "Top" if scrape_top else "Latest",
}
self.router = self.go_to_home
self.scroller = Scroller(self.driver)
if scrape_username is not None:
self.scraper_details["type"] = "Username"
@@ -462,14 +463,16 @@ It may be due to the following:
"Likes": [tweet[7] for tweet in self.data],
"Analytics": [tweet[8] for tweet in self.data],
"Tags": [tweet[9] for tweet in self.data],
"Profile Image": [tweet[10] for tweet in self.data],
"Mentions": [tweet[10] for tweet in self.data],
"Emojis": [tweet[11] for tweet in self.data],
"Profile Image": [tweet[12] for tweet in self.data],
}
df = pd.DataFrame(data)
current_time = now.strftime("%Y-%m-%d_%H-%M-%S")
file_path = f"{folder_path}{current_time}_tweets_1-{len(self.data)}.csv"
df.to_csv(file_path, index=False)
df.to_csv(file_path, index=False, encoding="utf-8")
print("CSV Saved: {}".format(file_path))