update Scraper Initialization

This commit is contained in:
Jarrian
2023-09-09 09:50:43 +08:00
parent 7dce277488
commit e4e46c9802

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 103,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@@ -11,11 +11,10 @@
"import pandas as pd\n",
"from datetime import datetime\n",
"from fake_headers import Headers\n",
"from getpass import getpass\n",
"from time import sleep\n",
"from selenium import webdriver\n",
"from selenium.webdriver.common.keys import Keys\n",
"from selenium.common.exceptions import NoSuchElementException, WebDriverException, StaleElementReferenceException\n",
"from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException\n",
"\n",
"from selenium.webdriver.chrome.options import Options as ChromeOptions\n",
"from selenium.webdriver.chrome.service import Service as ChromeService\n",
@@ -35,7 +34,7 @@
},
{
"cell_type": "code",
"execution_count": 104,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@@ -58,7 +57,7 @@
},
{
"cell_type": "code",
"execution_count": 105,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@@ -80,7 +79,7 @@
},
{
"cell_type": "code",
"execution_count": 106,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
@@ -111,9 +110,13 @@
"\n",
" # For Hiding Browser\n",
" browser_option.add_argument(\"--headless\")\n",
" \n",
" chromedriver_path=ChromeDriverManager().install()\n",
" chrome_service = ChromeService(executable_path=chromedriver_path)\n",
"\n",
" driver = webdriver.Chrome(\n",
" options=browser_option,\n",
" service=chrome_service,\n",
" options=browser_option,\n",
" )\n",
" \n",
" return driver\n",
@@ -135,7 +138,7 @@
" \"//input[@autocomplete='username']\"\n",
" )\n",
"\n",
" username.send_keys(USER_UNAME)\n",
" username.send_keys(self.username)\n",
" username.send_keys(Keys.RETURN)\n",
" sleep(3)\n",
"\n",
@@ -151,7 +154,7 @@
" \"xpath\",\n",
" \"//input[@data-testid='ocfEnterTextTextInput']\"\n",
" )\n",
" unusual_activity.send_keys(USER_UNAME)\n",
" unusual_activity.send_keys(self.username)\n",
" unusual_activity.send_keys(Keys.RETURN)\n",
" sleep(3)\n",
" except NoSuchElementException:\n",
@@ -165,7 +168,7 @@
" \"//input[@autocomplete='current-password']\"\n",
" )\n",
"\n",
" password.send_keys(USER_PASSWORD)\n",
" password.send_keys(self.password)\n",
" password.send_keys(Keys.RETURN)\n",
" sleep(3)\n",
"\n",
@@ -190,7 +193,7 @@
},
{
"cell_type": "code",
"execution_count": 107,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
@@ -283,16 +286,16 @@
},
{
"cell_type": "code",
"execution_count": 108,
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Progress: [[========================================]] 100.00% 10 of 10\n",
"Scraping Complete\n",
"Tweets: 10 out of 10\n"
"Progress: [[=======---------------------------------]] 18.00% 9 of 50\n",
"Scraping Incomplete\n",
"Tweets: 9 out of 50\n"
]
}
],
@@ -300,7 +303,7 @@
"scraper = Twitter_Scraper(\n",
" username=USER_UNAME,\n",
" password=USER_PASSWORD,\n",
" max_tweets=500\n",
" max_tweets=50\n",
")\n",
"\n",
"scraper.go_to_home()\n",
@@ -368,7 +371,7 @@
},
{
"cell_type": "code",
"execution_count": 109,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
@@ -394,7 +397,7 @@
},
{
"cell_type": "code",
"execution_count": 110,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -410,7 +413,7 @@
},
{
"cell_type": "code",
"execution_count": 111,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [