feat: scrape mentions and emojis
This commit is contained in:
@@ -77,13 +77,6 @@ class Tweet:
|
||||
except NoSuchElementException:
|
||||
self.analytics_cnt = "0"
|
||||
|
||||
try:
|
||||
self.profile_img = card.find_element(
|
||||
"xpath", './/div[@data-testid="Tweet-User-Avatar"]//img'
|
||||
).get_attribute("src")
|
||||
except NoSuchElementException:
|
||||
self.profile_img = ""
|
||||
|
||||
try:
|
||||
self.tags = card.find_elements(
|
||||
"xpath",
|
||||
@@ -94,6 +87,36 @@ class Tweet:
|
||||
except NoSuchElementException:
|
||||
self.tags = []
|
||||
|
||||
try:
|
||||
self.mentions = card.find_elements(
|
||||
"xpath",
|
||||
'(.//div[@data-testid="tweetText"])[1]//a[contains(text(), "@")]',
|
||||
)
|
||||
|
||||
self.mentions = [mention.text for mention in self.mentions]
|
||||
except NoSuchElementException:
|
||||
self.mentions = []
|
||||
|
||||
try:
|
||||
raw_emojis = card.find_elements(
|
||||
"xpath",
|
||||
'(.//div[@data-testid="tweetText"])[1]/img[contains(@src, "emoji")]',
|
||||
)
|
||||
|
||||
self.emojis = [
|
||||
emoji.get_attribute("alt").encode("unicode-escape").decode("ASCII")
|
||||
for emoji in raw_emojis
|
||||
]
|
||||
except NoSuchElementException:
|
||||
self.emojis = []
|
||||
|
||||
try:
|
||||
self.profile_img = card.find_element(
|
||||
"xpath", './/div[@data-testid="Tweet-User-Avatar"]//img'
|
||||
).get_attribute("src")
|
||||
except NoSuchElementException:
|
||||
self.profile_img = ""
|
||||
|
||||
self.tweet = (
|
||||
self.user,
|
||||
self.handle,
|
||||
@@ -105,6 +128,8 @@ class Tweet:
|
||||
self.like_cnt,
|
||||
self.analytics_cnt,
|
||||
self.tags,
|
||||
self.mentions,
|
||||
self.emojis,
|
||||
self.profile_img,
|
||||
)
|
||||
|
||||
|
||||
@@ -88,6 +88,7 @@ class Twitter_Scraper:
|
||||
"tab": "Latest" if scrape_latest else "Top" if scrape_top else "Latest",
|
||||
}
|
||||
self.router = self.go_to_home
|
||||
self.scroller = Scroller(self.driver)
|
||||
|
||||
if scrape_username is not None:
|
||||
self.scraper_details["type"] = "Username"
|
||||
@@ -462,14 +463,16 @@ It may be due to the following:
|
||||
"Likes": [tweet[7] for tweet in self.data],
|
||||
"Analytics": [tweet[8] for tweet in self.data],
|
||||
"Tags": [tweet[9] for tweet in self.data],
|
||||
"Profile Image": [tweet[10] for tweet in self.data],
|
||||
"Mentions": [tweet[10] for tweet in self.data],
|
||||
"Emojis": [tweet[11] for tweet in self.data],
|
||||
"Profile Image": [tweet[12] for tweet in self.data],
|
||||
}
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
current_time = now.strftime("%Y-%m-%d_%H-%M-%S")
|
||||
file_path = f"{folder_path}{current_time}_tweets_1-{len(self.data)}.csv"
|
||||
df.to_csv(file_path, index=False)
|
||||
df.to_csv(file_path, index=False, encoding="utf-8")
|
||||
|
||||
print("CSV Saved: {}".format(file_path))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user