update Scraper Initialization

This commit is contained in:
Jarrian
2023-09-09 09:50:43 +08:00
parent 7dce277488
commit e4e46c9802

View File

@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 103, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -11,11 +11,10 @@
"import pandas as pd\n", "import pandas as pd\n",
"from datetime import datetime\n", "from datetime import datetime\n",
"from fake_headers import Headers\n", "from fake_headers import Headers\n",
"from getpass import getpass\n",
"from time import sleep\n", "from time import sleep\n",
"from selenium import webdriver\n", "from selenium import webdriver\n",
"from selenium.webdriver.common.keys import Keys\n", "from selenium.webdriver.common.keys import Keys\n",
"from selenium.common.exceptions import NoSuchElementException, WebDriverException, StaleElementReferenceException\n", "from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException\n",
"\n", "\n",
"from selenium.webdriver.chrome.options import Options as ChromeOptions\n", "from selenium.webdriver.chrome.options import Options as ChromeOptions\n",
"from selenium.webdriver.chrome.service import Service as ChromeService\n", "from selenium.webdriver.chrome.service import Service as ChromeService\n",
@@ -35,7 +34,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 104, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -58,7 +57,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 105, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -80,7 +79,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 106, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -111,9 +110,13 @@
"\n", "\n",
" # For Hiding Browser\n", " # For Hiding Browser\n",
" browser_option.add_argument(\"--headless\")\n", " browser_option.add_argument(\"--headless\")\n",
" \n",
" chromedriver_path=ChromeDriverManager().install()\n",
" chrome_service = ChromeService(executable_path=chromedriver_path)\n",
"\n", "\n",
" driver = webdriver.Chrome(\n", " driver = webdriver.Chrome(\n",
" options=browser_option,\n", " service=chrome_service,\n",
" options=browser_option,\n",
" )\n", " )\n",
" \n", " \n",
" return driver\n", " return driver\n",
@@ -135,7 +138,7 @@
" \"//input[@autocomplete='username']\"\n", " \"//input[@autocomplete='username']\"\n",
" )\n", " )\n",
"\n", "\n",
" username.send_keys(USER_UNAME)\n", " username.send_keys(self.username)\n",
" username.send_keys(Keys.RETURN)\n", " username.send_keys(Keys.RETURN)\n",
" sleep(3)\n", " sleep(3)\n",
"\n", "\n",
@@ -151,7 +154,7 @@
" \"xpath\",\n", " \"xpath\",\n",
" \"//input[@data-testid='ocfEnterTextTextInput']\"\n", " \"//input[@data-testid='ocfEnterTextTextInput']\"\n",
" )\n", " )\n",
" unusual_activity.send_keys(USER_UNAME)\n", " unusual_activity.send_keys(self.username)\n",
" unusual_activity.send_keys(Keys.RETURN)\n", " unusual_activity.send_keys(Keys.RETURN)\n",
" sleep(3)\n", " sleep(3)\n",
" except NoSuchElementException:\n", " except NoSuchElementException:\n",
@@ -165,7 +168,7 @@
" \"//input[@autocomplete='current-password']\"\n", " \"//input[@autocomplete='current-password']\"\n",
" )\n", " )\n",
"\n", "\n",
" password.send_keys(USER_PASSWORD)\n", " password.send_keys(self.password)\n",
" password.send_keys(Keys.RETURN)\n", " password.send_keys(Keys.RETURN)\n",
" sleep(3)\n", " sleep(3)\n",
"\n", "\n",
@@ -190,7 +193,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 107, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -283,16 +286,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 108, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Progress: [[========================================]] 100.00% 10 of 10\n", "Progress: [[=======---------------------------------]] 18.00% 9 of 50\n",
"Scraping Complete\n", "Scraping Incomplete\n",
"Tweets: 10 out of 10\n" "Tweets: 9 out of 50\n"
] ]
} }
], ],
@@ -300,7 +303,7 @@
"scraper = Twitter_Scraper(\n", "scraper = Twitter_Scraper(\n",
" username=USER_UNAME,\n", " username=USER_UNAME,\n",
" password=USER_PASSWORD,\n", " password=USER_PASSWORD,\n",
" max_tweets=500\n", " max_tweets=50\n",
")\n", ")\n",
"\n", "\n",
"scraper.go_to_home()\n", "scraper.go_to_home()\n",
@@ -368,7 +371,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 109, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -394,7 +397,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 110, "execution_count": 17,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -410,7 +413,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 111, "execution_count": 18,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [