scrape tweet tags

This commit is contained in:
Jarrian
2023-09-09 15:34:33 +08:00
parent b43fb72dbd
commit efb0c65c39
2 changed files with 13 additions and 1 deletions

View File

@@ -81,6 +81,16 @@ class Tweet:
except NoSuchElementException:
self.profile_img = ""
try:
self.tags = card.find_elements(
"xpath",
'.//a[contains(@href, "src=hashtag_click")]',
)
self.tags = [tag.text for tag in self.tags]
except NoSuchElementException:
self.tags = []
self.tweet = (
self.user,
self.handle,
@@ -91,6 +101,7 @@ class Tweet:
self.retweet_cnt,
self.like_cnt,
self.analytics_cnt,
self.tags,
self.profile_img,
)

View File

@@ -270,7 +270,8 @@ It may be due to the following:
"Retweets": [tweet[6] for tweet in self.data],
"Likes": [tweet[7] for tweet in self.data],
"Analytics": [tweet[8] for tweet in self.data],
"Profile Image": [tweet[9] for tweet in self.data],
"Tags": [tweet[9] for tweet in self.data],
"Profile Image": [tweet[10] for tweet in self.data],
}
df = pd.DataFrame(data)