scrape tweet tags

This commit is contained in:
Jarrian
2023-09-09 15:34:33 +08:00
parent b43fb72dbd
commit efb0c65c39
2 changed files with 13 additions and 1 deletions

View File

@@ -81,6 +81,16 @@ class Tweet:
except NoSuchElementException: except NoSuchElementException:
self.profile_img = "" self.profile_img = ""
try:
self.tags = card.find_elements(
"xpath",
'.//a[contains(@href, "src=hashtag_click")]',
)
self.tags = [tag.text for tag in self.tags]
except NoSuchElementException:
self.tags = []
self.tweet = ( self.tweet = (
self.user, self.user,
self.handle, self.handle,
@@ -91,6 +101,7 @@ class Tweet:
self.retweet_cnt, self.retweet_cnt,
self.like_cnt, self.like_cnt,
self.analytics_cnt, self.analytics_cnt,
self.tags,
self.profile_img, self.profile_img,
) )

View File

@@ -270,7 +270,8 @@ It may be due to the following:
"Retweets": [tweet[6] for tweet in self.data], "Retweets": [tweet[6] for tweet in self.data],
"Likes": [tweet[7] for tweet in self.data], "Likes": [tweet[7] for tweet in self.data],
"Analytics": [tweet[8] for tweet in self.data], "Analytics": [tweet[8] for tweet in self.data],
"Profile Image": [tweet[9] for tweet in self.data], "Tags": [tweet[9] for tweet in self.data],
"Profile Image": [tweet[10] for tweet in self.data],
} }
df = pd.DataFrame(data) df = pd.DataFrame(data)