#!/bin/env python3 import json import numpy as np import pandas as pd import requests import matplotlib.pyplot as plt from datetime import datetime from scipy.stats import linregress class FourChanAnalyzer: def __init__(self): self.data = None self.df = None self.seasons = { "Season 1": ("2023-04-18", "2023-05-31"), "Season 2": ("2023-12-18", "2024-01-28"), "Season 3": ("2024-10-27", "2024-12-08"), "Season 4": ("2025-06-13", "2025-06-28"), "Bloodgames-Bitchtank": ("2024-06-24", "2024-08-02") } def fetch_data(self, board="tv"): """Fetch data from 4stats.io API""" print(f"๐Ÿ“ก Fetching data from 4stats.io for /{board}/...") headers = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0", "Accept": "application/json, text/plain, */*", "Accept-Encoding": "gzip, deflate, br, zstd", "Origin": "https://4stats.io", "DNT": "1", "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache" } try: response = requests.get(f"https://api.4stats.io/history/day/{board}", headers=headers, timeout=30) response.raise_for_status() self.data = response.json() print(f"โœ… Downloaded {len(self.data)} records.") return True except requests.exceptions.RequestException as e: print(f"โŒ Failed to fetch data: {e}") return False def filter_and_normalize(self, start_year=2023): """Filter data from specified year and normalize posts per day""" if not self.data: raise ValueError("No data available. Run fetch_data() first.") # Filter from specified year onward cutoff = datetime(start_year, 1, 1).timestamp() * 1000 filtered_data = [row for row in self.data if row[0] >= cutoff] print(f"โœ‚๏ธ Trimmed to {len(filtered_data)} records from {start_year} onward.") # Clean data - remove rows with None values in critical columns clean_data = [] for row in filtered_data: if row[0] is not None and row[2] is not None: clean_data.append(row) if len(clean_data) != len(filtered_data): print(f"๐Ÿงน Removed {len(filtered_data) - len(clean_data)} rows with missing data.") if len(clean_data) == 0: raise ValueError("No valid data remaining after cleaning.") # Normalize "Posts Per Day" (column 2, 0-indexed) timestamps = np.array([row[0] for row in clean_data]) posts_per_day = np.array([row[2] for row in clean_data], dtype=float) # Remove long-term trend using linear regression slope, intercept, r_value, p_value, std_err = linregress(timestamps, posts_per_day) trendline = slope * timestamps + intercept mean_value = posts_per_day.mean() adjusted_posts_per_day = posts_per_day - (trendline - mean_value) print(f"๐Ÿ“‰ Detrended data - Rยฒ: {r_value**2:.4f}, p-value: {p_value:.4e}") print(f"๐Ÿ“ˆ Trend slope: {slope*86400000:.2f} posts/day per day") # Create adjusted dataset adjusted_data = [ [row[0], row[1], float(round(adj, 2)), row[3]] for row, adj in zip(clean_data, adjusted_posts_per_day) ] # Convert to DataFrame self.df = pd.DataFrame(adjusted_data, columns=["Timestamp", "Column_2", "Posts_Per_Day", "Column_4"]) self.df["Date"] = pd.to_datetime(self.df["Timestamp"], unit="ms") return self.df def get_season_stats(self): """Calculate statistics for each season""" if self.df is None: raise ValueError("No processed data available. Run filter_and_normalize() first.") stats = {} season_ranges = { name: (datetime.strptime(start, "%Y-%m-%d"), datetime.strptime(end, "%Y-%m-%d")) for name, (start, end) in self.seasons.items() } for season_name, (start_date, end_date) in season_ranges.items(): season_df = self.df[(self.df["Date"] >= start_date) & (self.df["Date"] <= end_date)] if len(season_df) > 0: stats[season_name] = { "mean": season_df["Posts_Per_Day"].mean(), "std": season_df["Posts_Per_Day"].std(), "max": season_df["Posts_Per_Day"].max(), "min": season_df["Posts_Per_Day"].min(), "days": len(season_df), "start_date": start_date, "end_date": end_date } return stats def plot_seasons_comparison(self, max_days=42, figsize=(12, 8)): """Create comparison plot across seasons""" if self.df is None: raise ValueError("No processed data available. Run filter_and_normalize() first.") print(f"๐Ÿ“Š Creating comparison graph (max {max_days} days)...") plt.figure(figsize=figsize) season_ranges = { name: (datetime.strptime(start, "%Y-%m-%d"), datetime.strptime(end, "%Y-%m-%d")) for name, (start, end) in self.seasons.items() } # Define specific colors for each season season_colors = { "Season 1": "blue", "Season 2": "orange", "Season 3": "green", "Season 4": "red", "Bloodgames-Bitchtank": "purple" } for season_name, (start_date, end_date) in season_ranges.items(): season_df = self.df[(self.df["Date"] >= start_date) & (self.df["Date"] <= end_date)].copy() if len(season_df) == 0: print(f"โš ๏ธ No data found for {season_name}") continue season_df["Day_Index"] = (season_df["Date"] - start_date).dt.days + 1 season_df = season_df[season_df["Day_Index"] <= max_days] plt.plot(season_df["Day_Index"], season_df["Posts_Per_Day"], label=f"{season_name} (n={len(season_df)})", color=season_colors[season_name], linewidth=2, alpha=0.8) plt.xlabel("Days from Season Start", fontsize=12) plt.ylabel("Normalized Posts Per Day", fontsize=12) plt.title("4chan /tv/ Activity Across Seasons\n(Detrended and Normalized)", fontsize=14, pad=20) plt.legend(loc='upper right') plt.grid(True, alpha=0.3) plt.tight_layout() return plt.gcf() def plot_full_timeline(self, figsize=(15, 8)): """Plot full timeline with season highlights""" if self.df is None: raise ValueError("No processed data available. Run filter_and_normalize() first.") plt.figure(figsize=figsize) # Plot full timeline plt.plot(self.df["Date"], self.df["Posts_Per_Day"], color='lightgray', alpha=0.7, linewidth=1) # Highlight seasons season_ranges = { name: (datetime.strptime(start, "%Y-%m-%d"), datetime.strptime(end, "%Y-%m-%d")) for name, (start, end) in self.seasons.items() } # Define specific colors for each season season_colors = { "Season 1": "blue", "Season 2": "orange", "Season 3": "green", "Season 4": "red", "Bloodgames-Bitchtank": "purple" } for season_name, (start_date, end_date) in season_ranges.items(): season_df = self.df[(self.df["Date"] >= start_date) & (self.df["Date"] <= end_date)] if len(season_df) > 0: plt.plot(season_df["Date"], season_df["Posts_Per_Day"], color=season_colors[season_name], linewidth=3, alpha=0.8, label=season_name) plt.xlabel("Date", fontsize=12) plt.ylabel("Normalized Posts Per Day", fontsize=12) plt.title("4chan /tv/ Activity Timeline with Season Highlights", fontsize=14, pad=20) plt.legend() plt.grid(True, alpha=0.3) plt.xticks(rotation=45) plt.tight_layout() return plt.gcf() def print_summary(self): """Print summary statistics""" stats = self.get_season_stats() print("\n๐Ÿ“Š SEASON SUMMARY STATISTICS") print("=" * 50) for season_name, season_stats in stats.items(): print(f"\n{season_name}:") print(f" Duration: {season_stats['days']} days") print(f" Mean posts/day: {season_stats['mean']:.1f}") print(f" Std deviation: {season_stats['std']:.1f}") print(f" Range: {season_stats['min']:.1f} - {season_stats['max']:.1f}") print(f" Dates: {season_stats['start_date'].strftime('%Y-%m-%d')} to {season_stats['end_date'].strftime('%Y-%m-%d')}") def main(): """Main execution function""" analyzer = FourChanAnalyzer() # Fetch and process data if not analyzer.fetch_data(): return analyzer.filter_and_normalize() analyzer.print_summary() # Create visualizations fig1 = analyzer.plot_seasons_comparison() plt.show() fig2 = analyzer.plot_full_timeline() plt.show() if __name__ == "__main__": main()