update Scraper Initialization
This commit is contained in:
41
main.ipynb
41
main.ipynb
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 103,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -11,11 +11,10 @@
|
||||
"import pandas as pd\n",
|
||||
"from datetime import datetime\n",
|
||||
"from fake_headers import Headers\n",
|
||||
"from getpass import getpass\n",
|
||||
"from time import sleep\n",
|
||||
"from selenium import webdriver\n",
|
||||
"from selenium.webdriver.common.keys import Keys\n",
|
||||
"from selenium.common.exceptions import NoSuchElementException, WebDriverException, StaleElementReferenceException\n",
|
||||
"from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException\n",
|
||||
"\n",
|
||||
"from selenium.webdriver.chrome.options import Options as ChromeOptions\n",
|
||||
"from selenium.webdriver.chrome.service import Service as ChromeService\n",
|
||||
@@ -35,7 +34,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -58,7 +57,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -80,7 +79,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 106,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -111,9 +110,13 @@
|
||||
"\n",
|
||||
" # For Hiding Browser\n",
|
||||
" browser_option.add_argument(\"--headless\")\n",
|
||||
" \n",
|
||||
" chromedriver_path=ChromeDriverManager().install()\n",
|
||||
" chrome_service = ChromeService(executable_path=chromedriver_path)\n",
|
||||
"\n",
|
||||
" driver = webdriver.Chrome(\n",
|
||||
" options=browser_option,\n",
|
||||
" service=chrome_service,\n",
|
||||
" options=browser_option,\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" return driver\n",
|
||||
@@ -135,7 +138,7 @@
|
||||
" \"//input[@autocomplete='username']\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" username.send_keys(USER_UNAME)\n",
|
||||
" username.send_keys(self.username)\n",
|
||||
" username.send_keys(Keys.RETURN)\n",
|
||||
" sleep(3)\n",
|
||||
"\n",
|
||||
@@ -151,7 +154,7 @@
|
||||
" \"xpath\",\n",
|
||||
" \"//input[@data-testid='ocfEnterTextTextInput']\"\n",
|
||||
" )\n",
|
||||
" unusual_activity.send_keys(USER_UNAME)\n",
|
||||
" unusual_activity.send_keys(self.username)\n",
|
||||
" unusual_activity.send_keys(Keys.RETURN)\n",
|
||||
" sleep(3)\n",
|
||||
" except NoSuchElementException:\n",
|
||||
@@ -165,7 +168,7 @@
|
||||
" \"//input[@autocomplete='current-password']\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" password.send_keys(USER_PASSWORD)\n",
|
||||
" password.send_keys(self.password)\n",
|
||||
" password.send_keys(Keys.RETURN)\n",
|
||||
" sleep(3)\n",
|
||||
"\n",
|
||||
@@ -190,7 +193,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 107,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -283,16 +286,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 108,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Progress: [[========================================]] 100.00% 10 of 10\n",
|
||||
"Scraping Complete\n",
|
||||
"Tweets: 10 out of 10\n"
|
||||
"Progress: [[=======---------------------------------]] 18.00% 9 of 50\n",
|
||||
"Scraping Incomplete\n",
|
||||
"Tweets: 9 out of 50\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -300,7 +303,7 @@
|
||||
"scraper = Twitter_Scraper(\n",
|
||||
" username=USER_UNAME,\n",
|
||||
" password=USER_PASSWORD,\n",
|
||||
" max_tweets=500\n",
|
||||
" max_tweets=50\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"scraper.go_to_home()\n",
|
||||
@@ -368,7 +371,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 109,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -394,7 +397,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 110,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -410,7 +413,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 111,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
||||
Reference in New Issue
Block a user