handle StaleElementReferenceException error
This commit is contained in:
137
main.ipynb
137
main.ipynb
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -15,7 +15,7 @@
|
||||
"from time import sleep\n",
|
||||
"from selenium import webdriver\n",
|
||||
"from selenium.webdriver.common.keys import Keys\n",
|
||||
"from selenium.common.exceptions import NoSuchElementException, WebDriverException\n",
|
||||
"from selenium.common.exceptions import NoSuchElementException, WebDriverException, StaleElementReferenceException\n",
|
||||
"\n",
|
||||
"from selenium.webdriver.chrome.options import Options as ChromeOptions\n",
|
||||
"from selenium.webdriver.chrome.service import Service as ChromeService\n",
|
||||
@@ -29,7 +29,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"execution_count": 62,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -54,7 +54,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"execution_count": 63,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -76,7 +76,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"execution_count": 64,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -186,7 +186,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"execution_count": 65,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -279,17 +279,30 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"execution_count": 66,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Progress: [[========================================]] 100.00% 500 of 500\n",
|
||||
"\n",
|
||||
"Scraping Complete\n",
|
||||
"Tweets: 500\n"
|
||||
"Progress: [[======================================--]] 95.20% 476 of 500"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "StaleElementReferenceException",
|
||||
"evalue": "Message: stale element reference: stale element not found\n (Session info: headless chrome=116.0.5845.142); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception\nStacktrace:\n\tGetHandleVerifier [0x00DC37C3+48947]\n\t(No symbol) [0x00D58551]\n\t(No symbol) [0x00C5C92D]\n\t(No symbol) [0x00C5FD62]\n\t(No symbol) [0x00C6106A]\n\t(No symbol) [0x00C61110]\n\t(No symbol) [0x00C89B21]\n\t(No symbol) [0x00C89EFB]\n\t(No symbol) [0x00C82F61]\n\t(No symbol) [0x00CA50D4]\n\t(No symbol) [0x00C82EB6]\n\t(No symbol) [0x00CA53E4]\n\t(No symbol) [0x00CB75DA]\n\t(No symbol) [0x00CA4E86]\n\t(No symbol) [0x00C816C7]\n\t(No symbol) [0x00C8284D]\n\tGetHandleVerifier [0x0100FDF9+2458985]\n\tGetHandleVerifier [0x0105744F+2751423]\n\tGetHandleVerifier [0x01051361+2726609]\n\tGetHandleVerifier [0x00E40680+560624]\n\t(No symbol) [0x00D6238C]\n\t(No symbol) [0x00D5E268]\n\t(No symbol) [0x00D5E392]\n\t(No symbol) [0x00D510B7]\n\tBaseThreadInitThunk [0x77517D59+25]\n\tRtlInitializeExceptionChain [0x77D6B79B+107]\n\tRtlClearBits [0x77D6B71F+191]\n",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mStaleElementReferenceException\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[66], line 18\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[39mif\u001b[39;00m tweet_id \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m scraper\u001b[39m.\u001b[39mtweet_ids:\n\u001b[0;32m 17\u001b[0m scraper\u001b[39m.\u001b[39mtweet_ids\u001b[39m.\u001b[39madd(tweet_id)\n\u001b[1;32m---> 18\u001b[0m tweet \u001b[39m=\u001b[39m Tweet(card)\n\u001b[0;32m 19\u001b[0m \u001b[39mif\u001b[39;00m tweet:\n\u001b[0;32m 20\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m tweet\u001b[39m.\u001b[39mis_ad:\n",
|
||||
"Cell \u001b[1;32mIn[65], line 5\u001b[0m, in \u001b[0;36mTweet.__init__\u001b[1;34m(self, card)\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, card) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 3\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcard \u001b[39m=\u001b[39m card\n\u001b[1;32m----> 5\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39muser \u001b[39m=\u001b[39m card\u001b[39m.\u001b[39;49mfind_element(\n\u001b[0;32m 6\u001b[0m \u001b[39m'\u001b[39;49m\u001b[39mxpath\u001b[39;49m\u001b[39m'\u001b[39;49m,\n\u001b[0;32m 7\u001b[0m \u001b[39m'\u001b[39;49m\u001b[39m.//div[@data-testid=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mUser-Name\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m]//span\u001b[39;49m\u001b[39m'\u001b[39;49m\n\u001b[0;32m 8\u001b[0m )\u001b[39m.\u001b[39mtext\n\u001b[0;32m 10\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 11\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandle \u001b[39m=\u001b[39m card\u001b[39m.\u001b[39mfind_element(\n\u001b[0;32m 12\u001b[0m \u001b[39m'\u001b[39m\u001b[39mxpath\u001b[39m\u001b[39m'\u001b[39m,\n\u001b[0;32m 13\u001b[0m \u001b[39m'\u001b[39m\u001b[39m.//span[contains(text(), \u001b[39m\u001b[39m\"\u001b[39m\u001b[39m@\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m)]\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m 14\u001b[0m )\u001b[39m.\u001b[39mtext\n",
|
||||
"File \u001b[1;32mc:\\Users\\glori\\anaconda3\\envs\\ml\\lib\\site-packages\\selenium\\webdriver\\remote\\webelement.py:416\u001b[0m, in \u001b[0;36mWebElement.find_element\u001b[1;34m(self, by, value)\u001b[0m\n\u001b[0;32m 413\u001b[0m by \u001b[39m=\u001b[39m By\u001b[39m.\u001b[39mCSS_SELECTOR\n\u001b[0;32m 414\u001b[0m value \u001b[39m=\u001b[39m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39m[name=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mvalue\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m]\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m--> 416\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_execute(Command\u001b[39m.\u001b[39;49mFIND_CHILD_ELEMENT, {\u001b[39m\"\u001b[39;49m\u001b[39musing\u001b[39;49m\u001b[39m\"\u001b[39;49m: by, \u001b[39m\"\u001b[39;49m\u001b[39mvalue\u001b[39;49m\u001b[39m\"\u001b[39;49m: value})[\u001b[39m\"\u001b[39m\u001b[39mvalue\u001b[39m\u001b[39m\"\u001b[39m]\n",
|
||||
"File \u001b[1;32mc:\\Users\\glori\\anaconda3\\envs\\ml\\lib\\site-packages\\selenium\\webdriver\\remote\\webelement.py:394\u001b[0m, in \u001b[0;36mWebElement._execute\u001b[1;34m(self, command, params)\u001b[0m\n\u001b[0;32m 392\u001b[0m params \u001b[39m=\u001b[39m {}\n\u001b[0;32m 393\u001b[0m params[\u001b[39m\"\u001b[39m\u001b[39mid\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_id\n\u001b[1;32m--> 394\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_parent\u001b[39m.\u001b[39;49mexecute(command, params)\n",
|
||||
"File \u001b[1;32mc:\\Users\\glori\\anaconda3\\envs\\ml\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py:344\u001b[0m, in \u001b[0;36mWebDriver.execute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m 342\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcommand_executor\u001b[39m.\u001b[39mexecute(driver_command, params)\n\u001b[0;32m 343\u001b[0m \u001b[39mif\u001b[39;00m response:\n\u001b[1;32m--> 344\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49merror_handler\u001b[39m.\u001b[39;49mcheck_response(response)\n\u001b[0;32m 345\u001b[0m response[\u001b[39m\"\u001b[39m\u001b[39mvalue\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_unwrap_value(response\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mvalue\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m))\n\u001b[0;32m 346\u001b[0m \u001b[39mreturn\u001b[39;00m response\n",
|
||||
"File \u001b[1;32mc:\\Users\\glori\\anaconda3\\envs\\ml\\lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py:229\u001b[0m, in \u001b[0;36mErrorHandler.check_response\u001b[1;34m(self, response)\u001b[0m\n\u001b[0;32m 227\u001b[0m alert_text \u001b[39m=\u001b[39m value[\u001b[39m\"\u001b[39m\u001b[39malert\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mtext\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 228\u001b[0m \u001b[39mraise\u001b[39;00m exception_class(message, screen, stacktrace, alert_text) \u001b[39m# type: ignore[call-arg] # mypy is not smart enough here\u001b[39;00m\n\u001b[1;32m--> 229\u001b[0m \u001b[39mraise\u001b[39;00m exception_class(message, screen, stacktrace)\n",
|
||||
"\u001b[1;31mStaleElementReferenceException\u001b[0m: Message: stale element reference: stale element not found\n (Session info: headless chrome=116.0.5845.142); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception\nStacktrace:\n\tGetHandleVerifier [0x00DC37C3+48947]\n\t(No symbol) [0x00D58551]\n\t(No symbol) [0x00C5C92D]\n\t(No symbol) [0x00C5FD62]\n\t(No symbol) [0x00C6106A]\n\t(No symbol) [0x00C61110]\n\t(No symbol) [0x00C89B21]\n\t(No symbol) [0x00C89EFB]\n\t(No symbol) [0x00C82F61]\n\t(No symbol) [0x00CA50D4]\n\t(No symbol) [0x00C82EB6]\n\t(No symbol) [0x00CA53E4]\n\t(No symbol) [0x00CB75DA]\n\t(No symbol) [0x00CA4E86]\n\t(No symbol) [0x00C816C7]\n\t(No symbol) [0x00C8284D]\n\tGetHandleVerifier [0x0100FDF9+2458985]\n\tGetHandleVerifier [0x0105744F+2751423]\n\tGetHandleVerifier [0x01051361+2726609]\n\tGetHandleVerifier [0x00E40680+560624]\n\t(No symbol) [0x00D6238C]\n\t(No symbol) [0x00D5E268]\n\t(No symbol) [0x00D5E392]\n\t(No symbol) [0x00D510B7]\n\tBaseThreadInitThunk [0x77517D59+25]\n\tRtlInitializeExceptionChain [0x77D6B79B+107]\n\tRtlClearBits [0x77D6B71F+191]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -304,62 +317,66 @@
|
||||
"progress = Progress(0, scraper.max_tweets)\n",
|
||||
"progress.print_progress(0)\n",
|
||||
"\n",
|
||||
"while scraper.scroller.scrolling:\n",
|
||||
" scraper.get_tweets()\n",
|
||||
" \n",
|
||||
" for card in scraper.tweet_cards[-15:]:\n",
|
||||
" tweet_id = str(card)\n",
|
||||
" if tweet_id not in scraper.tweet_ids:\n",
|
||||
" scraper.tweet_ids.add(tweet_id)\n",
|
||||
" tweet = Tweet(card)\n",
|
||||
" if tweet:\n",
|
||||
" if not tweet.is_ad:\n",
|
||||
" scraper.data.append(tweet.tweet)\n",
|
||||
" progress.print_progress(len(scraper.data))\n",
|
||||
"try:\n",
|
||||
" while scraper.scroller.scrolling:\n",
|
||||
" scraper.get_tweets()\n",
|
||||
"\n",
|
||||
" if len(scraper.data) >= scraper.max_tweets:\n",
|
||||
" scraper.scroller.scrolling = False\n",
|
||||
" break\n",
|
||||
" \n",
|
||||
" if len(scraper.data) % 50 == 0:\n",
|
||||
" sleep(2)\n",
|
||||
" for card in scraper.tweet_cards[-15:]:\n",
|
||||
" tweet_id = str(card)\n",
|
||||
" if tweet_id not in scraper.tweet_ids:\n",
|
||||
" scraper.tweet_ids.add(tweet_id)\n",
|
||||
" tweet = Tweet(card)\n",
|
||||
" if tweet:\n",
|
||||
" if not tweet.is_ad:\n",
|
||||
" scraper.data.append(tweet.tweet)\n",
|
||||
" progress.print_progress(len(scraper.data))\n",
|
||||
"\n",
|
||||
" if len(scraper.data) >= scraper.max_tweets:\n",
|
||||
" break\n",
|
||||
" \n",
|
||||
" scraper.scroller.scroll_count = 0\n",
|
||||
" \n",
|
||||
" while True:\n",
|
||||
" scraper.driver.execute_script(\n",
|
||||
" 'window.scrollTo(0, document.body.scrollHeight);'\n",
|
||||
" )\n",
|
||||
" sleep(2)\n",
|
||||
" scraper.scroller.current_position = scraper.driver.execute_script(\n",
|
||||
" \"return window.pageYOffset;\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" if scraper.scroller.last_position == scraper.scroller.current_position:\n",
|
||||
" scraper.scroller.scroll_count += 1\n",
|
||||
" \n",
|
||||
" if scraper.scroller.scroll_count >= 3:\n",
|
||||
" scraper.go_to_home()\n",
|
||||
" sleep(2)\n",
|
||||
" scraper.scroller.reset()\n",
|
||||
" break\n",
|
||||
" else:\n",
|
||||
" sleep(2)\n",
|
||||
" else:\n",
|
||||
" scraper.scroller.last_position = scraper.scroller.current_position\n",
|
||||
" if len(scraper.data) >= scraper.max_tweets:\n",
|
||||
" scraper.scroller.scrolling = False\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" if len(scraper.data) % 50 == 0:\n",
|
||||
" sleep(2)\n",
|
||||
"\n",
|
||||
" if len(scraper.data) >= scraper.max_tweets:\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" scraper.scroller.scroll_count = 0\n",
|
||||
"\n",
|
||||
" while True:\n",
|
||||
" scraper.driver.execute_script(\n",
|
||||
" 'window.scrollTo(0, document.body.scrollHeight);'\n",
|
||||
" )\n",
|
||||
" sleep(2)\n",
|
||||
" scraper.scroller.current_position = scraper.driver.execute_script(\n",
|
||||
" \"return window.pageYOffset;\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if scraper.scroller.last_position == scraper.scroller.current_position:\n",
|
||||
" scraper.scroller.scroll_count += 1\n",
|
||||
"\n",
|
||||
" if scraper.scroller.scroll_count >= 3:\n",
|
||||
" scraper.go_to_home()\n",
|
||||
" sleep(2)\n",
|
||||
" scraper.scroller.reset()\n",
|
||||
" break\n",
|
||||
" else:\n",
|
||||
" sleep(2)\n",
|
||||
" else:\n",
|
||||
" scraper.scroller.last_position = scraper.scroller.current_position\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" print(\"Scraping Complete\")\n",
|
||||
"except StaleElementReferenceException:\n",
|
||||
" print(\"Scraping Incomplete\")\n",
|
||||
"\n",
|
||||
"scraper.driver.close()\n",
|
||||
"print(\"Scraping Complete\")\n",
|
||||
"print(\"Tweets: {}\".format(len(scraper.data)))"
|
||||
"print(\"Tweets: {} out of {}\".format(len(scraper.data), scraper.max_tweets))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -385,7 +402,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -401,7 +418,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
||||
Reference in New Issue
Block a user