remove commented codes

This commit is contained in:
Jarrian
2023-09-25 00:04:36 +08:00
parent da77993c12
commit ed0be321bb

View File

@@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": 94,
"execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
@@ -57,7 +57,7 @@
},
{
"cell_type": "code",
"execution_count": 95,
"execution_count": 114,
"metadata": {},
"outputs": [],
"source": [
@@ -97,7 +97,7 @@
},
{
"cell_type": "code",
"execution_count": 96,
"execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
@@ -141,7 +141,7 @@
},
{
"cell_type": "code",
"execution_count": 97,
"execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
@@ -272,7 +272,6 @@
" '(.//div[@data-testid=\"tweetText\"])[1]/img[contains(@src, \"emoji\")]',\n",
" )\n",
" \n",
" # self.emojis = [emoji.get_attribute(\"alt\").encode(\"utf-8\") for emoji in raw_emojis]\n",
" self.emojis = [emoji.get_attribute(\"alt\").encode(\"unicode-escape\").decode(\"ASCII\") for emoji in raw_emojis]\n",
" except NoSuchElementException:\n",
" self.emojis = []\n",
@@ -388,7 +387,7 @@
},
{
"cell_type": "code",
"execution_count": 98,
"execution_count": 117,
"metadata": {},
"outputs": [],
"source": [
@@ -761,7 +760,6 @@
" refresh_count = 0\n",
" added_tweets = 0\n",
" empty_count = 0\n",
" # stale_count = 0\n",
"\n",
" while self.scroller.scrolling:\n",
" try:\n",
@@ -772,15 +770,6 @@
" try:\n",
" tweet_id = str(card)\n",
"\n",
" # def hide_card(el):\n",
" # self.driver.execute_script(\n",
" # \"arguments[0].setAttribute('disabled', true);\", el\n",
" # )\n",
"\n",
" # self.driver.execute_script(\n",
" # \"arguments[0].parentElement.parentElement.parentElement.style.display='none';\", el\n",
" # )\n",
"\n",
" if tweet_id not in self.tweet_ids:\n",
" self.tweet_ids.add(tweet_id)\n",
"\n",
@@ -799,14 +788,6 @@
" )\n",
"\n",
" if tweet:\n",
" # try:\n",
" # tweet_sig = f\"{tweet.user}|{tweet.handle}|{tweet.date_time}|{tweet.is_ad}\"\n",
" # except Exception as e:\n",
" # continue\n",
"\n",
" # if tweet_sig not in self.tweet_ids:\n",
" # self.tweet_ids.add(tweet_sig)\n",
"\n",
" if not tweet.error and tweet.tweet is not None:\n",
" if not tweet.is_ad:\n",
" self.data.append(tweet.tweet)\n",
@@ -820,102 +801,29 @@
" continue\n",
" else:\n",
" continue\n",
" # else:\n",
" # continue\n",
" else:\n",
" continue\n",
" else:\n",
" continue\n",
" # hide_card(card)\n",
" except NoSuchElementException:\n",
" continue\n",
"\n",
" if len(self.data) >= self.max_tweets:\n",
" break\n",
"\n",
" # self.remove_hidden_cards()\n",
"\n",
" # if added_tweets == 0:\n",
" # refresh_count += 1\n",
"\n",
" # if len(self.tweet_cards) > 0:\n",
" # self.driver.execute_script(\n",
" # \"arguments[0].scrollIntoView();\", self.tweet_cards[-1]\n",
" # )\n",
" # sleep(2)\n",
"\n",
" # sleep(1)\n",
"\n",
" # if refresh_count >= 10:\n",
" # print()\n",
" # print(\"No more tweets to scrape\")\n",
" # break\n",
" # else:\n",
" # refresh_count = 0\n",
"\n",
" # if len(self.tweet_cards) == 0:\n",
" # empty_count += 1\n",
" # sleep(1)\n",
"\n",
" # if empty_count >= 3:\n",
" # router()\n",
" # sleep(2)\n",
" # break\n",
" # else:\n",
" # empty_count = 0\n",
"\n",
" # if added_tweets == 0:\n",
" # refresh_count += 1\n",
" # sleep(1)\n",
" # if refresh_count >= 10:\n",
" # print()\n",
" # print(\"No more tweets to scrape\")\n",
" # break\n",
" # else:\n",
" # refresh_count = 0\n",
"\n",
" if added_tweets == 0:\n",
" if empty_count >= 5:\n",
" if refresh_count >= 3:\n",
" print()\n",
" print(\"No more tweets to scrape\")\n",
" break\n",
" # router()\n",
" # sleep(2)\n",
" refresh_count += 1\n",
" empty_count += 1\n",
" sleep(1)\n",
" else:\n",
" empty_count = 0\n",
" refresh_count = 0\n",
"\n",
" # self.scroller.scroll_count = 0\n",
"\n",
" # while True:\n",
" # self.scroller.scroll_to_bottom()\n",
" # sleep(2)\n",
" # self.scroller.update_scroll_position()\n",
"\n",
" # if self.scroller.last_position == self.scroller.current_position:\n",
" # # self.scroller.scroll_count += 1\n",
"\n",
" # # if self.scroller.scroll_count >= 3:\n",
" # # router()\n",
" # # sleep(2)\n",
" # # break\n",
" # # else:\n",
" # # sleep(1)\n",
" # sleep(2)\n",
" # else:\n",
" # self.scroller.last_position = self.scroller.current_position\n",
" # break\n",
" except StaleElementReferenceException:\n",
" # stale_count += 1\n",
"\n",
" # if stale_count >= 3:\n",
" # router()\n",
" # stale_count = 0\n",
"\n",
" sleep(2)\n",
" continue\n",
" except KeyboardInterrupt:\n",
@@ -991,7 +899,7 @@
},
{
"cell_type": "code",
"execution_count": 101,
"execution_count": 118,
"metadata": {},
"outputs": [
{
@@ -1024,7 +932,7 @@
},
{
"cell_type": "code",
"execution_count": 102,
"execution_count": 119,
"metadata": {},
"outputs": [
{
@@ -1053,7 +961,7 @@
},
{
"cell_type": "code",
"execution_count": 103,
"execution_count": 120,
"metadata": {},
"outputs": [
{
@@ -1090,7 +998,7 @@
},
{
"cell_type": "code",
"execution_count": 104,
"execution_count": 121,
"metadata": {},
"outputs": [
{
@@ -1098,7 +1006,7 @@
"output_type": "stream",
"text": [
"Saving Tweets to CSV...\n",
"CSV Saved: ./tweets/2023-09-24_23-47-18_tweets_1-50.csv\n"
"CSV Saved: ./tweets/2023-09-24_23-57-11_tweets_1-50.csv\n"
]
}
],
@@ -1108,7 +1016,7 @@
},
{
"cell_type": "code",
"execution_count": 105,
"execution_count": 122,
"metadata": {},
"outputs": [],
"source": [