Files
selenium-twitter-scraper/main.ipynb
2023-09-09 00:17:24 +08:00

464 lines
22 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"import re\n",
"import pandas as pd\n",
"from fake_headers import Headers\n",
"from getpass import getpass\n",
"from time import sleep\n",
"from selenium import webdriver\n",
"from selenium.webdriver.common.keys import Keys\n",
"from selenium.common.exceptions import NoSuchElementException, WebDriverException, StaleElementReferenceException\n",
"\n",
"from selenium.webdriver.chrome.options import Options as ChromeOptions\n",
"from selenium.webdriver.chrome.service import Service as ChromeService\n",
"\n",
"from webdriver_manager.chrome import ChromeDriverManager\n",
"\n",
"USER_UNAME = os.environ['TWITTER_USERNAME']\n",
"USER_PASSWORD = os.environ['TWITTER_PASSWORD']\n",
"TWITTER_LOGIN_URL = \"https://twitter.com/i/flow/login\""
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"class Progress:\n",
" def __init__(self, current, total) -> None:\n",
" self.current = current\n",
" self.total = total\n",
" pass\n",
" \n",
" def print_progress(self, current) -> None:\n",
" self.current = current\n",
" progress = current / self.total\n",
" bar_length = 40\n",
" progress_bar = \"[\" + \"=\" * int(bar_length * progress) + \\\n",
" \"-\" * (bar_length - int(bar_length * progress)) + \"]\"\n",
" sys.stdout.write(\n",
" \"\\rProgress: [{:<40}] {:.2%} {} of {}\".format(progress_bar, progress, current, self.total))\n",
" sys.stdout.flush()\n",
" if current == self.total:\n",
" print(\"\\n\")\n"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"class Scroller():\n",
" def __init__(self, driver) -> None:\n",
" self.driver = driver\n",
" self.current_position = 0\n",
" self.last_position = driver.execute_script(\"return window.pageYOffset;\")\n",
" self.scrolling = True\n",
" self.scroll_count = 0\n",
" pass\n",
" \n",
" def reset(self) -> None:\n",
" self.current_position = 0\n",
" self.last_position = self.driver.execute_script(\"return window.pageYOffset;\")\n",
" self.scroll_count = 0\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"class Twitter_Scraper():\n",
" def __init__(self, username, password, max_tweets=50):\n",
" self.username = username\n",
" self.password = password\n",
" self.data = []\n",
" self.tweet_ids = set()\n",
" self.max_tweets = max_tweets\n",
" self.tweet_cards = []\n",
" self.driver = self._get_driver()\n",
" self.scroller = Scroller(self.driver)\n",
" self._login()\n",
" \n",
" def _get_driver(self):\n",
" header = Headers().generate()['User-Agent']\n",
"\n",
" browser_option = ChromeOptions()\n",
" browser_option.add_argument('--no-sandbox')\n",
" browser_option.add_argument(\"--disable-dev-shm-usage\")\n",
" browser_option.add_argument('--ignore-certificate-errors')\n",
" browser_option.add_argument('--disable-gpu')\n",
" browser_option.add_argument('--log-level=3')\n",
" browser_option.add_argument('--disable-notifications')\n",
" browser_option.add_argument('--disable-popup-blocking')\n",
" browser_option.add_argument('--user-agent={}'.format(header))\n",
"\n",
" # For Hiding Browser\n",
" browser_option.add_argument(\"--headless\")\n",
"\n",
" driver = webdriver.Chrome(\n",
" options=browser_option,\n",
" )\n",
" \n",
" return driver\n",
" \n",
" def _login(self):\n",
" self.driver.get(TWITTER_LOGIN_URL)\n",
" self.driver.maximize_window()\n",
" sleep(3)\n",
" \n",
" self._input_username()\n",
" self._input_unusual_activity()\n",
" self._input_password()\n",
" pass\n",
"\n",
" def _input_username(self):\n",
" try:\n",
" username = self.driver.find_element(\n",
" \"xpath\",\n",
" \"//input[@autocomplete='username']\"\n",
" )\n",
"\n",
" username.send_keys(USER_UNAME)\n",
" username.send_keys(Keys.RETURN)\n",
" sleep(3)\n",
"\n",
" except NoSuchElementException:\n",
" print(\"Username field not found\")\n",
" self.driver.quit()\n",
" exit()\n",
" pass\n",
"\n",
" def _input_unusual_activity(self):\n",
" try:\n",
" unusual_activity = self.driver.find_element(\n",
" \"xpath\",\n",
" \"//input[@data-testid='ocfEnterTextTextInput']\"\n",
" )\n",
" unusual_activity.send_keys(USER_UNAME)\n",
" unusual_activity.send_keys(Keys.RETURN)\n",
" sleep(3)\n",
" except NoSuchElementException:\n",
" pass\n",
" pass\n",
"\n",
" def _input_password(self):\n",
" try:\n",
" password = self.driver.find_element(\n",
" \"xpath\",\n",
" \"//input[@autocomplete='current-password']\"\n",
" )\n",
"\n",
" password.send_keys(USER_PASSWORD)\n",
" password.send_keys(Keys.RETURN)\n",
" sleep(3)\n",
"\n",
" except NoSuchElementException:\n",
" print(\"Password field not found\")\n",
" self.driver.quit()\n",
" exit()\n",
" pass\n",
" \n",
" def go_to_home(self):\n",
" self.driver.get(\"https://twitter.com/home\")\n",
" sleep(3)\n",
" pass\n",
" \n",
" def get_tweets(self):\n",
" self.tweet_cards = self.driver.find_elements(\n",
" 'xpath',\n",
" '//article[@data-testid=\"tweet\"]'\n",
" )\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"\n",
"class Tweet: \n",
" def __init__(self, card) -> None:\n",
" self.card = card\n",
" \n",
" self.user = card.find_element(\n",
" 'xpath',\n",
" './/div[@data-testid=\"User-Name\"]//span'\n",
" ).text\n",
" \n",
" try:\n",
" self.handle = card.find_element(\n",
" 'xpath',\n",
" './/span[contains(text(), \"@\")]'\n",
" ).text\n",
" except NoSuchElementException:\n",
" return\n",
" \n",
" try:\n",
" self.date_time = card.find_element(\n",
" 'xpath',\n",
" './/time'\n",
" ).get_attribute('datetime')\n",
" \n",
" if self.date_time is not None:\n",
" self.is_ad = False\n",
" except NoSuchElementException:\n",
" self.is_ad = True\n",
" return\n",
" \n",
" try:\n",
" card.find_element(\n",
" 'xpath',\n",
" './/*[local-name()=\"svg\" and @data-testid=\"icon-verified\"]'\n",
" )\n",
" \n",
" self.verified = True\n",
" except NoSuchElementException:\n",
" self.verified = False\n",
" \n",
" self.content = \"\"\n",
" contents = card.find_elements(\n",
" 'xpath',\n",
" './/div[@data-testid=\"tweetText\"]/span | .//div[@data-testid=\"tweetText\"]/a'\n",
" )\n",
"\n",
" for index, content in enumerate(contents):\n",
" self.content += content.text\n",
" \n",
" try:\n",
" self.reply_cnt= card.find_element(\n",
" 'xpath',\n",
" './/div[@data-testid=\"reply\"]//span'\n",
" ).text\n",
" except NoSuchElementException:\n",
" self.reply_cnt = '0'\n",
" \n",
" try:\n",
" self.retweet_cnt = card.find_element(\n",
" 'xpath',\n",
" './/div[@data-testid=\"retweet\"]//span'\n",
" ).text\n",
" except NoSuchElementException:\n",
" self.retweet_cnt = '0'\n",
" \n",
" try:\n",
" self.like_cnt = card.find_element(\n",
" 'xpath',\n",
" './/div[@data-testid=\"like\"]//span'\n",
" ).text\n",
" except NoSuchElementException:\n",
" self.like_cnt = '0'\n",
" \n",
" self.tweet = (\n",
" self.user,\n",
" self.handle,\n",
" self.date_time,\n",
" self.verified,\n",
" self.content,\n",
" self.reply_cnt,\n",
" self.retweet_cnt,\n",
" self.like_cnt\n",
" )\n",
" \n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Progress: [[======================================--]] 95.20% 476 of 500"
]
},
{
"ename": "StaleElementReferenceException",
"evalue": "Message: stale element reference: stale element not found\n (Session info: headless chrome=116.0.5845.142); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception\nStacktrace:\n\tGetHandleVerifier [0x00DC37C3+48947]\n\t(No symbol) [0x00D58551]\n\t(No symbol) [0x00C5C92D]\n\t(No symbol) [0x00C5FD62]\n\t(No symbol) [0x00C6106A]\n\t(No symbol) [0x00C61110]\n\t(No symbol) [0x00C89B21]\n\t(No symbol) [0x00C89EFB]\n\t(No symbol) [0x00C82F61]\n\t(No symbol) [0x00CA50D4]\n\t(No symbol) [0x00C82EB6]\n\t(No symbol) [0x00CA53E4]\n\t(No symbol) [0x00CB75DA]\n\t(No symbol) [0x00CA4E86]\n\t(No symbol) [0x00C816C7]\n\t(No symbol) [0x00C8284D]\n\tGetHandleVerifier [0x0100FDF9+2458985]\n\tGetHandleVerifier [0x0105744F+2751423]\n\tGetHandleVerifier [0x01051361+2726609]\n\tGetHandleVerifier [0x00E40680+560624]\n\t(No symbol) [0x00D6238C]\n\t(No symbol) [0x00D5E268]\n\t(No symbol) [0x00D5E392]\n\t(No symbol) [0x00D510B7]\n\tBaseThreadInitThunk [0x77517D59+25]\n\tRtlInitializeExceptionChain [0x77D6B79B+107]\n\tRtlClearBits [0x77D6B71F+191]\n",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mStaleElementReferenceException\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[66], line 18\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[39mif\u001b[39;00m tweet_id \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m scraper\u001b[39m.\u001b[39mtweet_ids:\n\u001b[0;32m 17\u001b[0m scraper\u001b[39m.\u001b[39mtweet_ids\u001b[39m.\u001b[39madd(tweet_id)\n\u001b[1;32m---> 18\u001b[0m tweet \u001b[39m=\u001b[39m Tweet(card)\n\u001b[0;32m 19\u001b[0m \u001b[39mif\u001b[39;00m tweet:\n\u001b[0;32m 20\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m tweet\u001b[39m.\u001b[39mis_ad:\n",
"Cell \u001b[1;32mIn[65], line 5\u001b[0m, in \u001b[0;36mTweet.__init__\u001b[1;34m(self, card)\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, card) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 3\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcard \u001b[39m=\u001b[39m card\n\u001b[1;32m----> 5\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39muser \u001b[39m=\u001b[39m card\u001b[39m.\u001b[39;49mfind_element(\n\u001b[0;32m 6\u001b[0m \u001b[39m'\u001b[39;49m\u001b[39mxpath\u001b[39;49m\u001b[39m'\u001b[39;49m,\n\u001b[0;32m 7\u001b[0m \u001b[39m'\u001b[39;49m\u001b[39m.//div[@data-testid=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mUser-Name\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m]//span\u001b[39;49m\u001b[39m'\u001b[39;49m\n\u001b[0;32m 8\u001b[0m )\u001b[39m.\u001b[39mtext\n\u001b[0;32m 10\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 11\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandle \u001b[39m=\u001b[39m card\u001b[39m.\u001b[39mfind_element(\n\u001b[0;32m 12\u001b[0m \u001b[39m'\u001b[39m\u001b[39mxpath\u001b[39m\u001b[39m'\u001b[39m,\n\u001b[0;32m 13\u001b[0m \u001b[39m'\u001b[39m\u001b[39m.//span[contains(text(), \u001b[39m\u001b[39m\"\u001b[39m\u001b[39m@\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m)]\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m 14\u001b[0m )\u001b[39m.\u001b[39mtext\n",
"File \u001b[1;32mc:\\Users\\glori\\anaconda3\\envs\\ml\\lib\\site-packages\\selenium\\webdriver\\remote\\webelement.py:416\u001b[0m, in \u001b[0;36mWebElement.find_element\u001b[1;34m(self, by, value)\u001b[0m\n\u001b[0;32m 413\u001b[0m by \u001b[39m=\u001b[39m By\u001b[39m.\u001b[39mCSS_SELECTOR\n\u001b[0;32m 414\u001b[0m value \u001b[39m=\u001b[39m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39m[name=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mvalue\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m]\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m--> 416\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_execute(Command\u001b[39m.\u001b[39;49mFIND_CHILD_ELEMENT, {\u001b[39m\"\u001b[39;49m\u001b[39musing\u001b[39;49m\u001b[39m\"\u001b[39;49m: by, \u001b[39m\"\u001b[39;49m\u001b[39mvalue\u001b[39;49m\u001b[39m\"\u001b[39;49m: value})[\u001b[39m\"\u001b[39m\u001b[39mvalue\u001b[39m\u001b[39m\"\u001b[39m]\n",
"File \u001b[1;32mc:\\Users\\glori\\anaconda3\\envs\\ml\\lib\\site-packages\\selenium\\webdriver\\remote\\webelement.py:394\u001b[0m, in \u001b[0;36mWebElement._execute\u001b[1;34m(self, command, params)\u001b[0m\n\u001b[0;32m 392\u001b[0m params \u001b[39m=\u001b[39m {}\n\u001b[0;32m 393\u001b[0m params[\u001b[39m\"\u001b[39m\u001b[39mid\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_id\n\u001b[1;32m--> 394\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_parent\u001b[39m.\u001b[39;49mexecute(command, params)\n",
"File \u001b[1;32mc:\\Users\\glori\\anaconda3\\envs\\ml\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py:344\u001b[0m, in \u001b[0;36mWebDriver.execute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m 342\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcommand_executor\u001b[39m.\u001b[39mexecute(driver_command, params)\n\u001b[0;32m 343\u001b[0m \u001b[39mif\u001b[39;00m response:\n\u001b[1;32m--> 344\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49merror_handler\u001b[39m.\u001b[39;49mcheck_response(response)\n\u001b[0;32m 345\u001b[0m response[\u001b[39m\"\u001b[39m\u001b[39mvalue\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_unwrap_value(response\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mvalue\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m))\n\u001b[0;32m 346\u001b[0m \u001b[39mreturn\u001b[39;00m response\n",
"File \u001b[1;32mc:\\Users\\glori\\anaconda3\\envs\\ml\\lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py:229\u001b[0m, in \u001b[0;36mErrorHandler.check_response\u001b[1;34m(self, response)\u001b[0m\n\u001b[0;32m 227\u001b[0m alert_text \u001b[39m=\u001b[39m value[\u001b[39m\"\u001b[39m\u001b[39malert\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mtext\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 228\u001b[0m \u001b[39mraise\u001b[39;00m exception_class(message, screen, stacktrace, alert_text) \u001b[39m# type: ignore[call-arg] # mypy is not smart enough here\u001b[39;00m\n\u001b[1;32m--> 229\u001b[0m \u001b[39mraise\u001b[39;00m exception_class(message, screen, stacktrace)\n",
"\u001b[1;31mStaleElementReferenceException\u001b[0m: Message: stale element reference: stale element not found\n (Session info: headless chrome=116.0.5845.142); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception\nStacktrace:\n\tGetHandleVerifier [0x00DC37C3+48947]\n\t(No symbol) [0x00D58551]\n\t(No symbol) [0x00C5C92D]\n\t(No symbol) [0x00C5FD62]\n\t(No symbol) [0x00C6106A]\n\t(No symbol) [0x00C61110]\n\t(No symbol) [0x00C89B21]\n\t(No symbol) [0x00C89EFB]\n\t(No symbol) [0x00C82F61]\n\t(No symbol) [0x00CA50D4]\n\t(No symbol) [0x00C82EB6]\n\t(No symbol) [0x00CA53E4]\n\t(No symbol) [0x00CB75DA]\n\t(No symbol) [0x00CA4E86]\n\t(No symbol) [0x00C816C7]\n\t(No symbol) [0x00C8284D]\n\tGetHandleVerifier [0x0100FDF9+2458985]\n\tGetHandleVerifier [0x0105744F+2751423]\n\tGetHandleVerifier [0x01051361+2726609]\n\tGetHandleVerifier [0x00E40680+560624]\n\t(No symbol) [0x00D6238C]\n\t(No symbol) [0x00D5E268]\n\t(No symbol) [0x00D5E392]\n\t(No symbol) [0x00D510B7]\n\tBaseThreadInitThunk [0x77517D59+25]\n\tRtlInitializeExceptionChain [0x77D6B79B+107]\n\tRtlClearBits [0x77D6B71F+191]\n"
]
}
],
"source": [
"scraper = Twitter_Scraper(\n",
" username=USER_UNAME,\n",
" password=USER_PASSWORD,\n",
" max_tweets=500\n",
")\n",
"\n",
"scraper.go_to_home()\n",
"progress = Progress(0, scraper.max_tweets)\n",
"progress.print_progress(0)\n",
"\n",
"try:\n",
" while scraper.scroller.scrolling:\n",
" scraper.get_tweets()\n",
"\n",
" for card in scraper.tweet_cards[-15:]:\n",
" tweet_id = str(card)\n",
" if tweet_id not in scraper.tweet_ids:\n",
" scraper.tweet_ids.add(tweet_id)\n",
" tweet = Tweet(card)\n",
" if tweet:\n",
" if not tweet.is_ad:\n",
" scraper.data.append(tweet.tweet)\n",
" progress.print_progress(len(scraper.data))\n",
"\n",
" if len(scraper.data) >= scraper.max_tweets:\n",
" scraper.scroller.scrolling = False\n",
" break\n",
"\n",
" if len(scraper.data) % 50 == 0:\n",
" sleep(2)\n",
"\n",
" if len(scraper.data) >= scraper.max_tweets:\n",
" break\n",
"\n",
" scraper.scroller.scroll_count = 0\n",
"\n",
" while True:\n",
" scraper.driver.execute_script(\n",
" 'window.scrollTo(0, document.body.scrollHeight);'\n",
" )\n",
" sleep(2)\n",
" scraper.scroller.current_position = scraper.driver.execute_script(\n",
" \"return window.pageYOffset;\"\n",
" )\n",
"\n",
" if scraper.scroller.last_position == scraper.scroller.current_position:\n",
" scraper.scroller.scroll_count += 1\n",
"\n",
" if scraper.scroller.scroll_count >= 3:\n",
" scraper.go_to_home()\n",
" sleep(2)\n",
" scraper.scroller.reset()\n",
" break\n",
" else:\n",
" sleep(2)\n",
" else:\n",
" scraper.scroller.last_position = scraper.scroller.current_position\n",
" break\n",
"\n",
" print(\"Scraping Complete\")\n",
"except StaleElementReferenceException:\n",
" print(\"Scraping Incomplete\")\n",
"\n",
"scraper.driver.close()\n",
"print(\"Tweets: {} out of {}\".format(len(scraper.data), scraper.max_tweets))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import tabulate\n",
"\n",
"# # Tabulate\n",
"# print(tabulate.tabulate(\n",
"# scraper.data[:10],\n",
"# headers=[\n",
"# 'Name',\n",
"# 'Handle',\n",
"# 'Date Time',\n",
"# 'Verified',\n",
"# 'Content',\n",
"# 'Reply Count',\n",
"# 'Retweet Count',\n",
"# 'Like Count'\n",
"# ],\n",
"# tablefmt='tsv'\n",
"# ))\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import csv\n",
"# \n",
"# with open('twitter_tweets.csv', 'w', encoding='utf-8', newline='') as f:\n",
"# header = ['Name', 'Handle', 'Timestamp', 'Verified',\n",
"# 'Content', 'Comments', 'Retweets', 'Likes']\n",
"# writer = csv.writer(f)\n",
"# writer.writerow(header)\n",
"# writer.writerows(scraper.data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = {\n",
" 'Name': [tweet[0] for tweet in scraper.data],\n",
" 'Handle': [tweet[1] for tweet in scraper.data],\n",
" 'Timestamp': [tweet[2] for tweet in scraper.data],\n",
" 'Verified': [tweet[3] for tweet in scraper.data],\n",
" 'Content': [tweet[4] for tweet in scraper.data],\n",
" 'Comments': [tweet[5] for tweet in scraper.data],\n",
" 'Retweets': [tweet[6] for tweet in scraper.data],\n",
" 'Likes': [tweet[7] for tweet in scraper.data]\n",
"}\n",
"\n",
"df = pd.DataFrame(data)\n",
"df.to_csv('twitter_tweets.csv', index=False)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ml",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}