Files
selenium-twitter-scraper/scraper/tweet.py

149 lines
4.2 KiB
Python

from selenium.webdriver import Chrome
from selenium.common.exceptions import NoSuchElementException
class Tweet:
def __init__(self, card: Chrome) -> None:
self.card = card
try:
self.user = card.find_element(
"xpath", './/div[@data-testid="User-Name"]//span'
).text
except NoSuchElementException:
return
try:
self.handle = card.find_element(
"xpath", './/span[contains(text(), "@")]'
).text
except NoSuchElementException:
return
try:
self.date_time = card.find_element("xpath", ".//time").get_attribute(
"datetime"
)
if self.date_time is not None:
self.is_ad = False
except NoSuchElementException:
self.is_ad = True
return
try:
card.find_element(
"xpath", './/*[local-name()="svg" and @data-testid="icon-verified"]'
)
self.verified = True
except NoSuchElementException:
self.verified = False
self.content = ""
contents = card.find_elements(
"xpath",
'(.//div[@data-testid="tweetText"])[1]/span | (.//div[@data-testid="tweetText"])[1]/a',
)
for index, content in enumerate(contents):
self.content += content.text
try:
self.reply_cnt = card.find_element(
"xpath", './/div[@data-testid="reply"]//span'
).text
if self.reply_cnt == "":
self.reply_cnt = "0"
except NoSuchElementException:
self.reply_cnt = "0"
try:
self.retweet_cnt = card.find_element(
"xpath", './/div[@data-testid="retweet"]//span'
).text
if self.retweet_cnt == "":
self.retweet_cnt = "0"
except NoSuchElementException:
self.retweet_cnt = "0"
try:
self.like_cnt = card.find_element(
"xpath", './/div[@data-testid="like"]//span'
).text
if self.like_cnt == "":
self.like_cnt = "0"
except NoSuchElementException:
self.like_cnt = "0"
try:
self.analytics_cnt = card.find_element(
"xpath", './/a[contains(@href, "/analytics")]//span'
).text
if self.analytics_cnt == "":
self.analytics_cnt = "0"
except NoSuchElementException:
self.analytics_cnt = "0"
try:
self.tags = card.find_elements(
"xpath",
'.//a[contains(@href, "src=hashtag_click")]',
)
self.tags = [tag.text for tag in self.tags]
except NoSuchElementException:
self.tags = []
try:
self.mentions = card.find_elements(
"xpath",
'(.//div[@data-testid="tweetText"])[1]//a[contains(text(), "@")]',
)
self.mentions = [mention.text for mention in self.mentions]
except NoSuchElementException:
self.mentions = []
try:
raw_emojis = card.find_elements(
"xpath",
'(.//div[@data-testid="tweetText"])[1]/img[contains(@src, "emoji")]',
)
self.emojis = [
emoji.get_attribute("alt").encode("unicode-escape").decode("ASCII")
for emoji in raw_emojis
]
except NoSuchElementException:
self.emojis = []
try:
self.profile_img = card.find_element(
"xpath", './/div[@data-testid="Tweet-User-Avatar"]//img'
).get_attribute("src")
except NoSuchElementException:
self.profile_img = ""
self.tweet = (
self.user,
self.handle,
self.date_time,
self.verified,
self.content,
self.reply_cnt,
self.retweet_cnt,
self.like_cnt,
self.analytics_cnt,
self.tags,
self.mentions,
self.emojis,
self.profile_img,
)
pass