From ed0be321bbb2e3d0e6e9473bdc7f65a81666fac6 Mon Sep 17 00:00:00 2001 From: Jarrian Date: Mon, 25 Sep 2023 00:04:36 +0800 Subject: [PATCH] remove commented codes --- main.ipynb | 114 ++++++----------------------------------------------- 1 file changed, 11 insertions(+), 103 deletions(-) diff --git a/main.ipynb b/main.ipynb index 9a3e5d0..3d864ce 100644 --- a/main.ipynb +++ b/main.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 113, "metadata": {}, "outputs": [], "source": [ @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 114, "metadata": {}, "outputs": [], "source": [ @@ -97,7 +97,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ @@ -141,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 116, "metadata": {}, "outputs": [], "source": [ @@ -272,7 +272,6 @@ " '(.//div[@data-testid=\"tweetText\"])[1]/img[contains(@src, \"emoji\")]',\n", " )\n", " \n", - " # self.emojis = [emoji.get_attribute(\"alt\").encode(\"utf-8\") for emoji in raw_emojis]\n", " self.emojis = [emoji.get_attribute(\"alt\").encode(\"unicode-escape\").decode(\"ASCII\") for emoji in raw_emojis]\n", " except NoSuchElementException:\n", " self.emojis = []\n", @@ -388,7 +387,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 117, "metadata": {}, "outputs": [], "source": [ @@ -761,7 +760,6 @@ " refresh_count = 0\n", " added_tweets = 0\n", " empty_count = 0\n", - " # stale_count = 0\n", "\n", " while self.scroller.scrolling:\n", " try:\n", @@ -772,15 +770,6 @@ " try:\n", " tweet_id = str(card)\n", "\n", - " # def hide_card(el):\n", - " # self.driver.execute_script(\n", - " # \"arguments[0].setAttribute('disabled', true);\", el\n", - " # )\n", - "\n", - " # self.driver.execute_script(\n", - " # \"arguments[0].parentElement.parentElement.parentElement.style.display='none';\", el\n", - " # )\n", - "\n", " if tweet_id not in self.tweet_ids:\n", " self.tweet_ids.add(tweet_id)\n", "\n", @@ -799,14 +788,6 @@ " )\n", "\n", " if tweet:\n", - " # try:\n", - " # tweet_sig = f\"{tweet.user}|{tweet.handle}|{tweet.date_time}|{tweet.is_ad}\"\n", - " # except Exception as e:\n", - " # continue\n", - "\n", - " # if tweet_sig not in self.tweet_ids:\n", - " # self.tweet_ids.add(tweet_sig)\n", - "\n", " if not tweet.error and tweet.tweet is not None:\n", " if not tweet.is_ad:\n", " self.data.append(tweet.tweet)\n", @@ -820,102 +801,29 @@ " continue\n", " else:\n", " continue\n", - " # else:\n", - " # continue\n", " else:\n", " continue\n", " else:\n", " continue\n", - " # hide_card(card)\n", " except NoSuchElementException:\n", " continue\n", "\n", " if len(self.data) >= self.max_tweets:\n", " break\n", "\n", - " # self.remove_hidden_cards()\n", - "\n", - " # if added_tweets == 0:\n", - " # refresh_count += 1\n", - "\n", - " # if len(self.tweet_cards) > 0:\n", - " # self.driver.execute_script(\n", - " # \"arguments[0].scrollIntoView();\", self.tweet_cards[-1]\n", - " # )\n", - " # sleep(2)\n", - "\n", - " # sleep(1)\n", - "\n", - " # if refresh_count >= 10:\n", - " # print()\n", - " # print(\"No more tweets to scrape\")\n", - " # break\n", - " # else:\n", - " # refresh_count = 0\n", - "\n", - " # if len(self.tweet_cards) == 0:\n", - " # empty_count += 1\n", - " # sleep(1)\n", - "\n", - " # if empty_count >= 3:\n", - " # router()\n", - " # sleep(2)\n", - " # break\n", - " # else:\n", - " # empty_count = 0\n", - "\n", - " # if added_tweets == 0:\n", - " # refresh_count += 1\n", - " # sleep(1)\n", - " # if refresh_count >= 10:\n", - " # print()\n", - " # print(\"No more tweets to scrape\")\n", - " # break\n", - " # else:\n", - " # refresh_count = 0\n", - "\n", " if added_tweets == 0:\n", " if empty_count >= 5:\n", " if refresh_count >= 3:\n", " print()\n", " print(\"No more tweets to scrape\")\n", " break\n", - " # router()\n", - " # sleep(2)\n", " refresh_count += 1\n", " empty_count += 1\n", " sleep(1)\n", " else:\n", " empty_count = 0\n", " refresh_count = 0\n", - "\n", - " # self.scroller.scroll_count = 0\n", - "\n", - " # while True:\n", - " # self.scroller.scroll_to_bottom()\n", - " # sleep(2)\n", - " # self.scroller.update_scroll_position()\n", - "\n", - " # if self.scroller.last_position == self.scroller.current_position:\n", - " # # self.scroller.scroll_count += 1\n", - "\n", - " # # if self.scroller.scroll_count >= 3:\n", - " # # router()\n", - " # # sleep(2)\n", - " # # break\n", - " # # else:\n", - " # # sleep(1)\n", - " # sleep(2)\n", - " # else:\n", - " # self.scroller.last_position = self.scroller.current_position\n", - " # break\n", " except StaleElementReferenceException:\n", - " # stale_count += 1\n", - "\n", - " # if stale_count >= 3:\n", - " # router()\n", - " # stale_count = 0\n", - "\n", " sleep(2)\n", " continue\n", " except KeyboardInterrupt:\n", @@ -991,7 +899,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 118, "metadata": {}, "outputs": [ { @@ -1024,7 +932,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 119, "metadata": {}, "outputs": [ { @@ -1053,7 +961,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 120, "metadata": {}, "outputs": [ { @@ -1090,7 +998,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 121, "metadata": {}, "outputs": [ { @@ -1098,7 +1006,7 @@ "output_type": "stream", "text": [ "Saving Tweets to CSV...\n", - "CSV Saved: ./tweets/2023-09-24_23-47-18_tweets_1-50.csv\n" + "CSV Saved: ./tweets/2023-09-24_23-57-11_tweets_1-50.csv\n" ] } ], @@ -1108,7 +1016,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 122, "metadata": {}, "outputs": [], "source": [