remove commented codes

This commit is contained in:
Jarrian
2023-09-25 00:04:36 +08:00
parent da77993c12
commit ed0be321bb

View File

@@ -17,7 +17,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 94, "execution_count": 113,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -57,7 +57,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 95, "execution_count": 114,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -97,7 +97,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 96, "execution_count": 115,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -141,7 +141,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 97, "execution_count": 116,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -272,7 +272,6 @@
" '(.//div[@data-testid=\"tweetText\"])[1]/img[contains(@src, \"emoji\")]',\n", " '(.//div[@data-testid=\"tweetText\"])[1]/img[contains(@src, \"emoji\")]',\n",
" )\n", " )\n",
" \n", " \n",
" # self.emojis = [emoji.get_attribute(\"alt\").encode(\"utf-8\") for emoji in raw_emojis]\n",
" self.emojis = [emoji.get_attribute(\"alt\").encode(\"unicode-escape\").decode(\"ASCII\") for emoji in raw_emojis]\n", " self.emojis = [emoji.get_attribute(\"alt\").encode(\"unicode-escape\").decode(\"ASCII\") for emoji in raw_emojis]\n",
" except NoSuchElementException:\n", " except NoSuchElementException:\n",
" self.emojis = []\n", " self.emojis = []\n",
@@ -388,7 +387,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 98, "execution_count": 117,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -761,7 +760,6 @@
" refresh_count = 0\n", " refresh_count = 0\n",
" added_tweets = 0\n", " added_tweets = 0\n",
" empty_count = 0\n", " empty_count = 0\n",
" # stale_count = 0\n",
"\n", "\n",
" while self.scroller.scrolling:\n", " while self.scroller.scrolling:\n",
" try:\n", " try:\n",
@@ -772,15 +770,6 @@
" try:\n", " try:\n",
" tweet_id = str(card)\n", " tweet_id = str(card)\n",
"\n", "\n",
" # def hide_card(el):\n",
" # self.driver.execute_script(\n",
" # \"arguments[0].setAttribute('disabled', true);\", el\n",
" # )\n",
"\n",
" # self.driver.execute_script(\n",
" # \"arguments[0].parentElement.parentElement.parentElement.style.display='none';\", el\n",
" # )\n",
"\n",
" if tweet_id not in self.tweet_ids:\n", " if tweet_id not in self.tweet_ids:\n",
" self.tweet_ids.add(tweet_id)\n", " self.tweet_ids.add(tweet_id)\n",
"\n", "\n",
@@ -799,14 +788,6 @@
" )\n", " )\n",
"\n", "\n",
" if tweet:\n", " if tweet:\n",
" # try:\n",
" # tweet_sig = f\"{tweet.user}|{tweet.handle}|{tweet.date_time}|{tweet.is_ad}\"\n",
" # except Exception as e:\n",
" # continue\n",
"\n",
" # if tweet_sig not in self.tweet_ids:\n",
" # self.tweet_ids.add(tweet_sig)\n",
"\n",
" if not tweet.error and tweet.tweet is not None:\n", " if not tweet.error and tweet.tweet is not None:\n",
" if not tweet.is_ad:\n", " if not tweet.is_ad:\n",
" self.data.append(tweet.tweet)\n", " self.data.append(tweet.tweet)\n",
@@ -820,102 +801,29 @@
" continue\n", " continue\n",
" else:\n", " else:\n",
" continue\n", " continue\n",
" # else:\n",
" # continue\n",
" else:\n", " else:\n",
" continue\n", " continue\n",
" else:\n", " else:\n",
" continue\n", " continue\n",
" # hide_card(card)\n",
" except NoSuchElementException:\n", " except NoSuchElementException:\n",
" continue\n", " continue\n",
"\n", "\n",
" if len(self.data) >= self.max_tweets:\n", " if len(self.data) >= self.max_tweets:\n",
" break\n", " break\n",
"\n", "\n",
" # self.remove_hidden_cards()\n",
"\n",
" # if added_tweets == 0:\n",
" # refresh_count += 1\n",
"\n",
" # if len(self.tweet_cards) > 0:\n",
" # self.driver.execute_script(\n",
" # \"arguments[0].scrollIntoView();\", self.tweet_cards[-1]\n",
" # )\n",
" # sleep(2)\n",
"\n",
" # sleep(1)\n",
"\n",
" # if refresh_count >= 10:\n",
" # print()\n",
" # print(\"No more tweets to scrape\")\n",
" # break\n",
" # else:\n",
" # refresh_count = 0\n",
"\n",
" # if len(self.tweet_cards) == 0:\n",
" # empty_count += 1\n",
" # sleep(1)\n",
"\n",
" # if empty_count >= 3:\n",
" # router()\n",
" # sleep(2)\n",
" # break\n",
" # else:\n",
" # empty_count = 0\n",
"\n",
" # if added_tweets == 0:\n",
" # refresh_count += 1\n",
" # sleep(1)\n",
" # if refresh_count >= 10:\n",
" # print()\n",
" # print(\"No more tweets to scrape\")\n",
" # break\n",
" # else:\n",
" # refresh_count = 0\n",
"\n",
" if added_tweets == 0:\n", " if added_tweets == 0:\n",
" if empty_count >= 5:\n", " if empty_count >= 5:\n",
" if refresh_count >= 3:\n", " if refresh_count >= 3:\n",
" print()\n", " print()\n",
" print(\"No more tweets to scrape\")\n", " print(\"No more tweets to scrape\")\n",
" break\n", " break\n",
" # router()\n",
" # sleep(2)\n",
" refresh_count += 1\n", " refresh_count += 1\n",
" empty_count += 1\n", " empty_count += 1\n",
" sleep(1)\n", " sleep(1)\n",
" else:\n", " else:\n",
" empty_count = 0\n", " empty_count = 0\n",
" refresh_count = 0\n", " refresh_count = 0\n",
"\n",
" # self.scroller.scroll_count = 0\n",
"\n",
" # while True:\n",
" # self.scroller.scroll_to_bottom()\n",
" # sleep(2)\n",
" # self.scroller.update_scroll_position()\n",
"\n",
" # if self.scroller.last_position == self.scroller.current_position:\n",
" # # self.scroller.scroll_count += 1\n",
"\n",
" # # if self.scroller.scroll_count >= 3:\n",
" # # router()\n",
" # # sleep(2)\n",
" # # break\n",
" # # else:\n",
" # # sleep(1)\n",
" # sleep(2)\n",
" # else:\n",
" # self.scroller.last_position = self.scroller.current_position\n",
" # break\n",
" except StaleElementReferenceException:\n", " except StaleElementReferenceException:\n",
" # stale_count += 1\n",
"\n",
" # if stale_count >= 3:\n",
" # router()\n",
" # stale_count = 0\n",
"\n",
" sleep(2)\n", " sleep(2)\n",
" continue\n", " continue\n",
" except KeyboardInterrupt:\n", " except KeyboardInterrupt:\n",
@@ -991,7 +899,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 101, "execution_count": 118,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -1024,7 +932,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 102, "execution_count": 119,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -1053,7 +961,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 103, "execution_count": 120,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -1090,7 +998,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 104, "execution_count": 121,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -1098,7 +1006,7 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Saving Tweets to CSV...\n", "Saving Tweets to CSV...\n",
"CSV Saved: ./tweets/2023-09-24_23-47-18_tweets_1-50.csv\n" "CSV Saved: ./tweets/2023-09-24_23-57-11_tweets_1-50.csv\n"
] ]
} }
], ],
@@ -1108,7 +1016,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 105, "execution_count": 122,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [