scrape tweet analytics and user avatar data

This commit is contained in:
Jarrian
2023-09-09 13:34:46 +08:00
parent ec1309885a
commit 8843b4d81e
2 changed files with 25 additions and 4 deletions

View File

@@ -1,6 +1,7 @@
from selenium.webdriver import Chrome
from selenium.common.exceptions import NoSuchElementException
class Tweet:
def __init__(self, card: Chrome) -> None:
self.card = card
@@ -73,6 +74,22 @@ class Tweet:
except NoSuchElementException:
self.like_cnt = '0'
try:
self.analytics_cnt = card.find_element(
'xpath',
'.//a[contains(@href, "/analytics")]//span'
).text
except NoSuchElementException:
self.analytics_cnt = '0'
try:
self.profile_img = card.find_element(
'xpath',
'.//div[@data-testid="Tweet-User-Avatar"]//img'
).get_attribute('src')
except NoSuchElementException:
self.profile_img = ''
self.tweet = (
self.user,
self.handle,
@@ -81,7 +98,9 @@ class Tweet:
self.content,
self.reply_cnt,
self.retweet_cnt,
self.like_cnt
self.like_cnt,
self.analytics_cnt,
self.profile_img
)
pass

View File

@@ -117,7 +117,7 @@ It may be due to the following:
- Twitter is experiencing unusual activity
""")
self.driver.quit()
exit()
sys.exit(1)
else:
print("Re-attempting to input username...")
@@ -166,7 +166,7 @@ It may be due to the following:
- Twitter is experiencing unusual activity
""")
self.driver.quit()
exit()
sys.exit(1)
else:
print("Re-attempting to input password...")
@@ -263,7 +263,9 @@ It may be due to the following:
'Content': [tweet[4] for tweet in self.data],
'Comments': [tweet[5] for tweet in self.data],
'Retweets': [tweet[6] for tweet in self.data],
'Likes': [tweet[7] for tweet in self.data]
'Likes': [tweet[7] for tweet in self.data],
'Analytics': [tweet[8] for tweet in self.data],
'Profile Image': [tweet[9] for tweet in self.data],
}
df = pd.DataFrame(data)