feat: Added choices for headless yes or no, along with user-agent related changes. (#27 from ReptilianPride/browser-options-update)

Added choices for headless yes or no, along with user-agent related changes.
This commit is contained in:
Jarrian Gojar
2025-03-02 15:43:39 +08:00
committed by GitHub
5 changed files with 31 additions and 4 deletions

View File

@@ -1,2 +1,3 @@
TWITTER_USERNAME=# Your Twitter Handle
TWITTER_PASSWORD=# Your Twitter Password
HEADLESS=# Headless browser option (use "yes" or "no")

1
sample-command.txt Normal file
View File

@@ -0,0 +1 @@
python scraper --query='("NVDA" OR "nvidia") lang:en until:2024-01-19 since:2024-01-18' -t 5000 --top

View File

@@ -44,6 +44,13 @@ def main():
default=os.getenv("TWITTER_PASSWORD"),
help="Your Twitter password.",
)
parser.add_argument(
"--headlessState",
type=str,
default=os.getenv("HEADLESS"),
help="Headless mode? [yes/no]"
)
except Exception as e:
print(f"Error retrieving environment variables: {e}")
sys.exit(1)
@@ -113,6 +120,7 @@ def main():
USER_MAIL = args.mail
USER_UNAME = args.user
USER_PASSWORD = args.password
HEADLESS_MODE= args.headlessState
if USER_UNAME is None:
USER_UNAME = input("Twitter Username: ")
@@ -120,6 +128,9 @@ def main():
if USER_PASSWORD is None:
USER_PASSWORD = getpass.getpass("Enter Password: ")
if HEADLESS_MODE is None:
HEADLESS_MODE - str(input("Headless?[Yes/No]")).lower()
print()
tweet_type_args = []
@@ -146,6 +157,7 @@ def main():
mail=USER_MAIL,
username=USER_UNAME,
password=USER_PASSWORD,
headlessState=HEADLESS_MODE
)
scraper.login()
scraper.scrape_tweets(

View File

@@ -1,3 +1,7 @@
import time
import random
class Scroller:
def __init__(self, driver) -> None:
self.driver = driver

View File

@@ -37,6 +37,7 @@ class Twitter_Scraper:
mail,
username,
password,
headlessState,
max_tweets=50,
scrape_username=None,
scrape_hashtag=None,
@@ -50,6 +51,7 @@ class Twitter_Scraper:
self.mail = mail
self.username = username
self.password = password
self.headlessState = headlessState
self.interrupted = False
self.tweet_ids = set()
self.data = []
@@ -125,7 +127,10 @@ class Twitter_Scraper:
proxy=None,
):
print("Setup WebDriver...")
header = Headers().generate()["User-Agent"]
# header = Headers().generate()["User-Agent"]
# User agent of a andoird smartphone device
header="Mozilla/5.0 (Linux; Android 11; SM-G998B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5414.87 Mobile Safari/537.36"
# browser_option = ChromeOptions()
browser_option = FirefoxOptions()
@@ -140,6 +145,9 @@ class Twitter_Scraper:
if proxy is not None:
browser_option.add_argument("--proxy-server=%s" % proxy)
# Option to hide browser or not
# If not yes then skips the headless
if self.headlessState == 'yes':
# For Hiding Browser
browser_option.add_argument("--headless")
@@ -191,6 +199,7 @@ class Twitter_Scraper:
try:
self.driver.maximize_window()
self.driver.execute_script("document.body.style.zoom='150%'") #set zoom to 150%
self.driver.get(TWITTER_LOGIN_URL)
sleep(3)
@@ -490,7 +499,7 @@ It may be due to the following:
retry_button = self.driver.find_element(
"xpath", "//span[text()='Retry']/../../..")
self.progress.print_progress(len(self.data), True, retry_cnt, no_tweets_limit)
sleep(58)
sleep(600)
retry_button.click()
retry_cnt += 1
sleep(2)