diff --git a/scraper/tweet.py b/scraper/tweet.py index 3cb1c44..1608ac5 100644 --- a/scraper/tweet.py +++ b/scraper/tweet.py @@ -81,6 +81,16 @@ class Tweet: except NoSuchElementException: self.profile_img = "" + try: + self.tags = card.find_elements( + "xpath", + './/a[contains(@href, "src=hashtag_click")]', + ) + + self.tags = [tag.text for tag in self.tags] + except NoSuchElementException: + self.tags = [] + self.tweet = ( self.user, self.handle, @@ -91,6 +101,7 @@ class Tweet: self.retweet_cnt, self.like_cnt, self.analytics_cnt, + self.tags, self.profile_img, ) diff --git a/scraper/twitter_scraper.py b/scraper/twitter_scraper.py index 3543da6..b367640 100644 --- a/scraper/twitter_scraper.py +++ b/scraper/twitter_scraper.py @@ -270,7 +270,8 @@ It may be due to the following: "Retweets": [tweet[6] for tweet in self.data], "Likes": [tweet[7] for tweet in self.data], "Analytics": [tweet[8] for tweet in self.data], - "Profile Image": [tweet[9] for tweet in self.data], + "Tags": [tweet[9] for tweet in self.data], + "Profile Image": [tweet[10] for tweet in self.data], } df = pd.DataFrame(data)