handle StaleElementReferenceException error

This commit is contained in:
Jarrian
2023-09-09 00:16:01 +08:00
parent 98547ff8e2
commit 4fcfb6d3d7

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 52,
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
@@ -15,7 +15,7 @@
"from time import sleep\n",
"from selenium import webdriver\n",
"from selenium.webdriver.common.keys import Keys\n",
"from selenium.common.exceptions import NoSuchElementException, WebDriverException\n",
"from selenium.common.exceptions import NoSuchElementException, WebDriverException, StaleElementReferenceException\n",
"\n",
"from selenium.webdriver.chrome.options import Options as ChromeOptions\n",
"from selenium.webdriver.chrome.service import Service as ChromeService\n",
@@ -29,7 +29,7 @@
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
@@ -54,7 +54,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
@@ -76,7 +76,7 @@
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
@@ -186,7 +186,7 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
@@ -279,17 +279,30 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 66,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Progress: [[========================================]] 100.00% 500 of 500\n",
"\n",
"Scraping Complete\n",
"Tweets: 500\n"
"Progress: [[======================================--]] 95.20% 476 of 500"
]
},
{
"ename": "StaleElementReferenceException",
"evalue": "Message: stale element reference: stale element not found\n (Session info: headless chrome=116.0.5845.142); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception\nStacktrace:\n\tGetHandleVerifier [0x00DC37C3+48947]\n\t(No symbol) [0x00D58551]\n\t(No symbol) [0x00C5C92D]\n\t(No symbol) [0x00C5FD62]\n\t(No symbol) [0x00C6106A]\n\t(No symbol) [0x00C61110]\n\t(No symbol) [0x00C89B21]\n\t(No symbol) [0x00C89EFB]\n\t(No symbol) [0x00C82F61]\n\t(No symbol) [0x00CA50D4]\n\t(No symbol) [0x00C82EB6]\n\t(No symbol) [0x00CA53E4]\n\t(No symbol) [0x00CB75DA]\n\t(No symbol) [0x00CA4E86]\n\t(No symbol) [0x00C816C7]\n\t(No symbol) [0x00C8284D]\n\tGetHandleVerifier [0x0100FDF9+2458985]\n\tGetHandleVerifier [0x0105744F+2751423]\n\tGetHandleVerifier [0x01051361+2726609]\n\tGetHandleVerifier [0x00E40680+560624]\n\t(No symbol) [0x00D6238C]\n\t(No symbol) [0x00D5E268]\n\t(No symbol) [0x00D5E392]\n\t(No symbol) [0x00D510B7]\n\tBaseThreadInitThunk [0x77517D59+25]\n\tRtlInitializeExceptionChain [0x77D6B79B+107]\n\tRtlClearBits [0x77D6B71F+191]\n",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mStaleElementReferenceException\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[66], line 18\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[39mif\u001b[39;00m tweet_id \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m scraper\u001b[39m.\u001b[39mtweet_ids:\n\u001b[0;32m 17\u001b[0m scraper\u001b[39m.\u001b[39mtweet_ids\u001b[39m.\u001b[39madd(tweet_id)\n\u001b[1;32m---> 18\u001b[0m tweet \u001b[39m=\u001b[39m Tweet(card)\n\u001b[0;32m 19\u001b[0m \u001b[39mif\u001b[39;00m tweet:\n\u001b[0;32m 20\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m tweet\u001b[39m.\u001b[39mis_ad:\n",
"Cell \u001b[1;32mIn[65], line 5\u001b[0m, in \u001b[0;36mTweet.__init__\u001b[1;34m(self, card)\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, card) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 3\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcard \u001b[39m=\u001b[39m card\n\u001b[1;32m----> 5\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39muser \u001b[39m=\u001b[39m card\u001b[39m.\u001b[39;49mfind_element(\n\u001b[0;32m 6\u001b[0m \u001b[39m'\u001b[39;49m\u001b[39mxpath\u001b[39;49m\u001b[39m'\u001b[39;49m,\n\u001b[0;32m 7\u001b[0m \u001b[39m'\u001b[39;49m\u001b[39m.//div[@data-testid=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mUser-Name\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m]//span\u001b[39;49m\u001b[39m'\u001b[39;49m\n\u001b[0;32m 8\u001b[0m )\u001b[39m.\u001b[39mtext\n\u001b[0;32m 10\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 11\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandle \u001b[39m=\u001b[39m card\u001b[39m.\u001b[39mfind_element(\n\u001b[0;32m 12\u001b[0m \u001b[39m'\u001b[39m\u001b[39mxpath\u001b[39m\u001b[39m'\u001b[39m,\n\u001b[0;32m 13\u001b[0m \u001b[39m'\u001b[39m\u001b[39m.//span[contains(text(), \u001b[39m\u001b[39m\"\u001b[39m\u001b[39m@\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m)]\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m 14\u001b[0m )\u001b[39m.\u001b[39mtext\n",
"File \u001b[1;32mc:\\Users\\glori\\anaconda3\\envs\\ml\\lib\\site-packages\\selenium\\webdriver\\remote\\webelement.py:416\u001b[0m, in \u001b[0;36mWebElement.find_element\u001b[1;34m(self, by, value)\u001b[0m\n\u001b[0;32m 413\u001b[0m by \u001b[39m=\u001b[39m By\u001b[39m.\u001b[39mCSS_SELECTOR\n\u001b[0;32m 414\u001b[0m value \u001b[39m=\u001b[39m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39m[name=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mvalue\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m]\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m--> 416\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_execute(Command\u001b[39m.\u001b[39;49mFIND_CHILD_ELEMENT, {\u001b[39m\"\u001b[39;49m\u001b[39musing\u001b[39;49m\u001b[39m\"\u001b[39;49m: by, \u001b[39m\"\u001b[39;49m\u001b[39mvalue\u001b[39;49m\u001b[39m\"\u001b[39;49m: value})[\u001b[39m\"\u001b[39m\u001b[39mvalue\u001b[39m\u001b[39m\"\u001b[39m]\n",
"File \u001b[1;32mc:\\Users\\glori\\anaconda3\\envs\\ml\\lib\\site-packages\\selenium\\webdriver\\remote\\webelement.py:394\u001b[0m, in \u001b[0;36mWebElement._execute\u001b[1;34m(self, command, params)\u001b[0m\n\u001b[0;32m 392\u001b[0m params \u001b[39m=\u001b[39m {}\n\u001b[0;32m 393\u001b[0m params[\u001b[39m\"\u001b[39m\u001b[39mid\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_id\n\u001b[1;32m--> 394\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_parent\u001b[39m.\u001b[39;49mexecute(command, params)\n",
"File \u001b[1;32mc:\\Users\\glori\\anaconda3\\envs\\ml\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py:344\u001b[0m, in \u001b[0;36mWebDriver.execute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m 342\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcommand_executor\u001b[39m.\u001b[39mexecute(driver_command, params)\n\u001b[0;32m 343\u001b[0m \u001b[39mif\u001b[39;00m response:\n\u001b[1;32m--> 344\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49merror_handler\u001b[39m.\u001b[39;49mcheck_response(response)\n\u001b[0;32m 345\u001b[0m response[\u001b[39m\"\u001b[39m\u001b[39mvalue\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_unwrap_value(response\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mvalue\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m))\n\u001b[0;32m 346\u001b[0m \u001b[39mreturn\u001b[39;00m response\n",
"File \u001b[1;32mc:\\Users\\glori\\anaconda3\\envs\\ml\\lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py:229\u001b[0m, in \u001b[0;36mErrorHandler.check_response\u001b[1;34m(self, response)\u001b[0m\n\u001b[0;32m 227\u001b[0m alert_text \u001b[39m=\u001b[39m value[\u001b[39m\"\u001b[39m\u001b[39malert\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mtext\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 228\u001b[0m \u001b[39mraise\u001b[39;00m exception_class(message, screen, stacktrace, alert_text) \u001b[39m# type: ignore[call-arg] # mypy is not smart enough here\u001b[39;00m\n\u001b[1;32m--> 229\u001b[0m \u001b[39mraise\u001b[39;00m exception_class(message, screen, stacktrace)\n",
"\u001b[1;31mStaleElementReferenceException\u001b[0m: Message: stale element reference: stale element not found\n (Session info: headless chrome=116.0.5845.142); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception\nStacktrace:\n\tGetHandleVerifier [0x00DC37C3+48947]\n\t(No symbol) [0x00D58551]\n\t(No symbol) [0x00C5C92D]\n\t(No symbol) [0x00C5FD62]\n\t(No symbol) [0x00C6106A]\n\t(No symbol) [0x00C61110]\n\t(No symbol) [0x00C89B21]\n\t(No symbol) [0x00C89EFB]\n\t(No symbol) [0x00C82F61]\n\t(No symbol) [0x00CA50D4]\n\t(No symbol) [0x00C82EB6]\n\t(No symbol) [0x00CA53E4]\n\t(No symbol) [0x00CB75DA]\n\t(No symbol) [0x00CA4E86]\n\t(No symbol) [0x00C816C7]\n\t(No symbol) [0x00C8284D]\n\tGetHandleVerifier [0x0100FDF9+2458985]\n\tGetHandleVerifier [0x0105744F+2751423]\n\tGetHandleVerifier [0x01051361+2726609]\n\tGetHandleVerifier [0x00E40680+560624]\n\t(No symbol) [0x00D6238C]\n\t(No symbol) [0x00D5E268]\n\t(No symbol) [0x00D5E392]\n\t(No symbol) [0x00D510B7]\n\tBaseThreadInitThunk [0x77517D59+25]\n\tRtlInitializeExceptionChain [0x77D6B79B+107]\n\tRtlClearBits [0x77D6B71F+191]\n"
]
}
],
@@ -304,62 +317,66 @@
"progress = Progress(0, scraper.max_tweets)\n",
"progress.print_progress(0)\n",
"\n",
"while scraper.scroller.scrolling:\n",
" scraper.get_tweets()\n",
" \n",
" for card in scraper.tweet_cards[-15:]:\n",
" tweet_id = str(card)\n",
" if tweet_id not in scraper.tweet_ids:\n",
" scraper.tweet_ids.add(tweet_id)\n",
" tweet = Tweet(card)\n",
" if tweet:\n",
" if not tweet.is_ad:\n",
" scraper.data.append(tweet.tweet)\n",
" progress.print_progress(len(scraper.data))\n",
"try:\n",
" while scraper.scroller.scrolling:\n",
" scraper.get_tweets()\n",
"\n",
" if len(scraper.data) >= scraper.max_tweets:\n",
" scraper.scroller.scrolling = False\n",
" break\n",
" \n",
" if len(scraper.data) % 50 == 0:\n",
" sleep(2)\n",
" for card in scraper.tweet_cards[-15:]:\n",
" tweet_id = str(card)\n",
" if tweet_id not in scraper.tweet_ids:\n",
" scraper.tweet_ids.add(tweet_id)\n",
" tweet = Tweet(card)\n",
" if tweet:\n",
" if not tweet.is_ad:\n",
" scraper.data.append(tweet.tweet)\n",
" progress.print_progress(len(scraper.data))\n",
"\n",
" if len(scraper.data) >= scraper.max_tweets:\n",
" break\n",
" \n",
" scraper.scroller.scroll_count = 0\n",
" \n",
" while True:\n",
" scraper.driver.execute_script(\n",
" 'window.scrollTo(0, document.body.scrollHeight);'\n",
" )\n",
" sleep(2)\n",
" scraper.scroller.current_position = scraper.driver.execute_script(\n",
" \"return window.pageYOffset;\"\n",
" )\n",
" \n",
" if scraper.scroller.last_position == scraper.scroller.current_position:\n",
" scraper.scroller.scroll_count += 1\n",
" \n",
" if scraper.scroller.scroll_count >= 3:\n",
" scraper.go_to_home()\n",
" sleep(2)\n",
" scraper.scroller.reset()\n",
" break\n",
" else:\n",
" sleep(2)\n",
" else:\n",
" scraper.scroller.last_position = scraper.scroller.current_position\n",
" if len(scraper.data) >= scraper.max_tweets:\n",
" scraper.scroller.scrolling = False\n",
" break\n",
"\n",
" if len(scraper.data) % 50 == 0:\n",
" sleep(2)\n",
"\n",
" if len(scraper.data) >= scraper.max_tweets:\n",
" break\n",
"\n",
" scraper.scroller.scroll_count = 0\n",
"\n",
" while True:\n",
" scraper.driver.execute_script(\n",
" 'window.scrollTo(0, document.body.scrollHeight);'\n",
" )\n",
" sleep(2)\n",
" scraper.scroller.current_position = scraper.driver.execute_script(\n",
" \"return window.pageYOffset;\"\n",
" )\n",
"\n",
" if scraper.scroller.last_position == scraper.scroller.current_position:\n",
" scraper.scroller.scroll_count += 1\n",
"\n",
" if scraper.scroller.scroll_count >= 3:\n",
" scraper.go_to_home()\n",
" sleep(2)\n",
" scraper.scroller.reset()\n",
" break\n",
" else:\n",
" sleep(2)\n",
" else:\n",
" scraper.scroller.last_position = scraper.scroller.current_position\n",
" break\n",
"\n",
" print(\"Scraping Complete\")\n",
"except StaleElementReferenceException:\n",
" print(\"Scraping Incomplete\")\n",
"\n",
"scraper.driver.close()\n",
"print(\"Scraping Complete\")\n",
"print(\"Tweets: {}\".format(len(scraper.data)))"
"print(\"Tweets: {} out of {}\".format(len(scraper.data), scraper.max_tweets))"
]
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -385,7 +402,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -401,7 +418,7 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [