278 lines
8.7 KiB
Python
278 lines
8.7 KiB
Python
from time import sleep
|
|
from selenium.common.exceptions import (
|
|
NoSuchElementException,
|
|
StaleElementReferenceException,
|
|
)
|
|
from selenium.webdriver.chrome.webdriver import WebDriver
|
|
from selenium.webdriver.common.action_chains import ActionChains
|
|
|
|
|
|
class Tweet:
|
|
def __init__(
|
|
self,
|
|
card: WebDriver,
|
|
driver: WebDriver,
|
|
actions: ActionChains,
|
|
scrape_poster_details=False,
|
|
) -> None:
|
|
self.card = card
|
|
self.error = False
|
|
self.tweet = None
|
|
|
|
try:
|
|
self.user = card.find_element(
|
|
"xpath", './/div[@data-testid="User-Name"]//span'
|
|
).text
|
|
except NoSuchElementException:
|
|
self.error = True
|
|
self.user = "skip"
|
|
|
|
try:
|
|
self.handle = card.find_element(
|
|
"xpath", './/span[contains(text(), "@")]'
|
|
).text
|
|
except NoSuchElementException:
|
|
self.error = True
|
|
self.handle = "skip"
|
|
|
|
try:
|
|
self.date_time = card.find_element("xpath", ".//time").get_attribute(
|
|
"datetime"
|
|
)
|
|
|
|
if self.date_time is not None:
|
|
self.is_ad = False
|
|
except NoSuchElementException:
|
|
self.is_ad = True
|
|
self.error = True
|
|
self.date_time = "skip"
|
|
|
|
if self.error:
|
|
return
|
|
|
|
try:
|
|
card.find_element(
|
|
"xpath", './/*[local-name()="svg" and @data-testid="icon-verified"]'
|
|
)
|
|
|
|
self.verified = True
|
|
except NoSuchElementException:
|
|
self.verified = False
|
|
|
|
self.content = ""
|
|
contents = card.find_elements(
|
|
"xpath",
|
|
'(.//div[@data-testid="tweetText"])[1]/span | (.//div[@data-testid="tweetText"])[1]/a',
|
|
)
|
|
|
|
for index, content in enumerate(contents):
|
|
self.content += content.text
|
|
|
|
try:
|
|
self.reply_cnt = card.find_element(
|
|
"xpath", './/button[@data-testid="reply"]//span'
|
|
).text
|
|
|
|
if self.reply_cnt == "":
|
|
self.reply_cnt = "0"
|
|
except NoSuchElementException:
|
|
self.reply_cnt = "0"
|
|
|
|
try:
|
|
self.retweet_cnt = card.find_element(
|
|
"xpath", './/button[@data-testid="retweet"]//span'
|
|
).text
|
|
|
|
if self.retweet_cnt == "":
|
|
self.retweet_cnt = "0"
|
|
except NoSuchElementException:
|
|
self.retweet_cnt = "0"
|
|
|
|
try:
|
|
self.like_cnt = card.find_element(
|
|
"xpath", './/button[@data-testid="like"]//span'
|
|
).text
|
|
|
|
if self.like_cnt == "":
|
|
self.like_cnt = "0"
|
|
except NoSuchElementException:
|
|
self.like_cnt = "0"
|
|
|
|
try:
|
|
self.analytics_cnt = card.find_element(
|
|
"xpath", './/a[contains(@href, "/analytics")]//span'
|
|
).text
|
|
|
|
if self.analytics_cnt == "":
|
|
self.analytics_cnt = "0"
|
|
except NoSuchElementException:
|
|
self.analytics_cnt = "0"
|
|
|
|
try:
|
|
self.tags = card.find_elements(
|
|
"xpath",
|
|
'.//a[contains(@href, "src=hashtag_click")]',
|
|
)
|
|
|
|
self.tags = [tag.text for tag in self.tags]
|
|
except NoSuchElementException:
|
|
self.tags = []
|
|
|
|
try:
|
|
self.mentions = card.find_elements(
|
|
"xpath",
|
|
'(.//div[@data-testid="tweetText"])[1]//a[contains(text(), "@")]',
|
|
)
|
|
|
|
self.mentions = [mention.text for mention in self.mentions]
|
|
except NoSuchElementException:
|
|
self.mentions = []
|
|
|
|
try:
|
|
raw_emojis = card.find_elements(
|
|
"xpath",
|
|
'(.//div[@data-testid="tweetText"])[1]/img[contains(@src, "emoji")]',
|
|
)
|
|
|
|
self.emojis = [
|
|
emoji.get_attribute("alt").encode("unicode-escape").decode("ASCII")
|
|
for emoji in raw_emojis
|
|
]
|
|
except NoSuchElementException:
|
|
self.emojis = []
|
|
|
|
try:
|
|
self.profile_img = card.find_element(
|
|
"xpath", './/div[@data-testid="Tweet-User-Avatar"]//img'
|
|
).get_attribute("src")
|
|
except NoSuchElementException:
|
|
self.profile_img = ""
|
|
|
|
try:
|
|
self.tweet_link = self.card.find_element(
|
|
"xpath",
|
|
".//a[contains(@href, '/status/')]",
|
|
).get_attribute("href")
|
|
self.tweet_id = str(self.tweet_link.split("/")[-1])
|
|
except NoSuchElementException:
|
|
self.tweet_link = ""
|
|
self.tweet_id = ""
|
|
|
|
self.following_cnt = "0"
|
|
self.followers_cnt = "0"
|
|
self.user_id = None
|
|
|
|
if scrape_poster_details:
|
|
el_name = card.find_element(
|
|
"xpath", './/div[@data-testid="User-Name"]//span'
|
|
)
|
|
|
|
ext_hover_card = False
|
|
ext_user_id = False
|
|
ext_following = False
|
|
ext_followers = False
|
|
hover_attempt = 0
|
|
|
|
while (
|
|
not ext_hover_card
|
|
or not ext_user_id
|
|
or not ext_following
|
|
or not ext_followers
|
|
):
|
|
try:
|
|
actions.move_to_element(el_name).perform()
|
|
|
|
hover_card = driver.find_element(
|
|
"xpath", '//div[@data-testid="hoverCardParent"]'
|
|
)
|
|
|
|
ext_hover_card = True
|
|
|
|
while not ext_user_id:
|
|
try:
|
|
raw_user_id = hover_card.find_element(
|
|
"xpath",
|
|
'(.//div[contains(@data-testid, "-follow")]) | (.//div[contains(@data-testid, "-unfollow")])',
|
|
).get_attribute("data-testid")
|
|
|
|
if raw_user_id == "":
|
|
self.user_id = None
|
|
else:
|
|
self.user_id = str(raw_user_id.split("-")[0])
|
|
|
|
ext_user_id = True
|
|
except NoSuchElementException:
|
|
continue
|
|
except StaleElementReferenceException:
|
|
self.error = True
|
|
return
|
|
|
|
while not ext_following:
|
|
try:
|
|
self.following_cnt = hover_card.find_element(
|
|
"xpath", './/a[contains(@href, "/following")]//span'
|
|
).text
|
|
|
|
if self.following_cnt == "":
|
|
self.following_cnt = "0"
|
|
|
|
ext_following = True
|
|
except NoSuchElementException:
|
|
continue
|
|
except StaleElementReferenceException:
|
|
self.error = True
|
|
return
|
|
|
|
while not ext_followers:
|
|
try:
|
|
self.followers_cnt = hover_card.find_element(
|
|
"xpath",
|
|
'.//a[contains(@href, "/verified_followers")]//span',
|
|
).text
|
|
|
|
if self.followers_cnt == "":
|
|
self.followers_cnt = "0"
|
|
|
|
ext_followers = True
|
|
except NoSuchElementException:
|
|
continue
|
|
except StaleElementReferenceException:
|
|
self.error = True
|
|
return
|
|
except NoSuchElementException:
|
|
if hover_attempt == 3:
|
|
self.error
|
|
return
|
|
hover_attempt += 1
|
|
sleep(0.5)
|
|
continue
|
|
except StaleElementReferenceException:
|
|
self.error = True
|
|
return
|
|
|
|
if ext_hover_card and ext_following and ext_followers:
|
|
actions.reset_actions()
|
|
|
|
self.tweet = (
|
|
self.user,
|
|
self.handle,
|
|
self.date_time,
|
|
self.verified,
|
|
self.content,
|
|
self.reply_cnt,
|
|
self.retweet_cnt,
|
|
self.like_cnt,
|
|
self.analytics_cnt,
|
|
self.tags,
|
|
self.mentions,
|
|
self.emojis,
|
|
self.profile_img,
|
|
self.tweet_link,
|
|
self.tweet_id,
|
|
self.user_id,
|
|
self.following_cnt,
|
|
self.followers_cnt,
|
|
)
|
|
|
|
pass
|