from time import sleep from selenium.common.exceptions import ( NoSuchElementException, StaleElementReferenceException, ) from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.common.action_chains import ActionChains class Tweet: def __init__( self, card: WebDriver, driver: WebDriver, actions: ActionChains, scrape_poster_details=False, ) -> None: self.card = card self.error = False self.tweet = None try: self.user = card.find_element( "xpath", './/div[@data-testid="User-Name"]//span' ).text except NoSuchElementException: self.error = True self.user = "skip" try: self.handle = card.find_element( "xpath", './/span[contains(text(), "@")]' ).text except NoSuchElementException: self.error = True self.handle = "skip" try: self.date_time = card.find_element("xpath", ".//time").get_attribute( "datetime" ) if self.date_time is not None: self.is_ad = False except NoSuchElementException: self.is_ad = True self.error = True self.date_time = "skip" if self.error: return try: card.find_element( "xpath", './/*[local-name()="svg" and @data-testid="icon-verified"]' ) self.verified = True except NoSuchElementException: self.verified = False self.content = "" contents = card.find_elements( "xpath", '(.//div[@data-testid="tweetText"])[1]/span | (.//div[@data-testid="tweetText"])[1]/a', ) for index, content in enumerate(contents): self.content += content.text try: self.reply_cnt = card.find_element( "xpath", './/div[@data-testid="reply"]//span' ).text if self.reply_cnt == "": self.reply_cnt = "0" except NoSuchElementException: self.reply_cnt = "0" try: self.retweet_cnt = card.find_element( "xpath", './/div[@data-testid="retweet"]//span' ).text if self.retweet_cnt == "": self.retweet_cnt = "0" except NoSuchElementException: self.retweet_cnt = "0" try: self.like_cnt = card.find_element( "xpath", './/div[@data-testid="like"]//span' ).text if self.like_cnt == "": self.like_cnt = "0" except NoSuchElementException: self.like_cnt = "0" try: self.analytics_cnt = card.find_element( "xpath", './/a[contains(@href, "/analytics")]//span' ).text if self.analytics_cnt == "": self.analytics_cnt = "0" except NoSuchElementException: self.analytics_cnt = "0" try: self.tags = card.find_elements( "xpath", './/a[contains(@href, "src=hashtag_click")]', ) self.tags = [tag.text for tag in self.tags] except NoSuchElementException: self.tags = [] try: self.mentions = card.find_elements( "xpath", '(.//div[@data-testid="tweetText"])[1]//a[contains(text(), "@")]', ) self.mentions = [mention.text for mention in self.mentions] except NoSuchElementException: self.mentions = [] try: raw_emojis = card.find_elements( "xpath", '(.//div[@data-testid="tweetText"])[1]/img[contains(@src, "emoji")]', ) self.emojis = [ emoji.get_attribute("alt").encode("unicode-escape").decode("ASCII") for emoji in raw_emojis ] except NoSuchElementException: self.emojis = [] try: self.profile_img = card.find_element( "xpath", './/div[@data-testid="Tweet-User-Avatar"]//img' ).get_attribute("src") except NoSuchElementException: self.profile_img = "" self.following_cnt = "0" self.followers_cnt = "0" if scrape_poster_details: el_name = card.find_element( "xpath", './/div[@data-testid="User-Name"]//span' ) ext_hover_card = False ext_following = False ext_followers = False hover_attempt = 0 while not ext_hover_card or not ext_following or not ext_followers: try: actions.move_to_element(el_name).perform() hover_card = driver.find_element( "xpath", '//div[@data-testid="hoverCardParent"]' ) ext_hover_card = True while not ext_following: try: self.following_cnt = hover_card.find_element( "xpath", './/a[contains(@href, "/following")]//span' ).text if self.following_cnt == "": self.following_cnt = "0" ext_following = True except NoSuchElementException: continue except StaleElementReferenceException: self.error = True return while not ext_followers: try: self.followers_cnt = hover_card.find_element( "xpath", './/a[contains(@href, "/verified_followers")]//span', ).text if self.followers_cnt == "": self.followers_cnt = "0" ext_followers = True except NoSuchElementException: continue except StaleElementReferenceException: self.error = True return except NoSuchElementException: if hover_attempt == 3: self.error return hover_attempt += 1 sleep(0.5) continue except StaleElementReferenceException: self.error = True return if ext_hover_card and ext_following and ext_followers: actions.reset_actions() self.tweet = ( self.user, self.handle, self.date_time, self.verified, self.content, self.reply_cnt, self.retweet_cnt, self.like_cnt, self.analytics_cnt, self.tags, self.mentions, self.emojis, self.profile_img, self.following_cnt, self.followers_cnt, ) pass