From feb7c3d9a4553bad7e3e3d07b3825870cce4832d Mon Sep 17 00:00:00 2001 From: Salastil Date: Thu, 20 Mar 2025 02:24:52 -0400 Subject: [PATCH] Exception handling over Unicode decoding --- scraper/tweet.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scraper/tweet.py b/scraper/tweet.py index 147815f..65658de 100644 --- a/scraper/tweet.py +++ b/scraper/tweet.py @@ -23,7 +23,8 @@ class Tweet: self.user = card.find_element( "xpath", './/div[@data-testid="User-Name"]//span' ).text - except NoSuchElementException: + self.user = raw_user.encode("utf-8", "ignore").decode("utf-8") + except (NoSuchElementException, UnicodeEncodeError, UnicodeDecodeError): self.error = True self.user = "skip" @@ -66,7 +67,11 @@ class Tweet: ) for index, content in enumerate(contents): - self.content += content.text + try: + text = content.text.encode("utf-8", "ignore").decode("utf-8") + self.content += text + except (UnicodeEncodeError, UnicodeDecodeError): + continue try: self.reply_cnt = card.find_element(