commit f83980a2a3b46959825873bc504cf055d12f8dd9 Author: Salastil Date: Fri Dec 19 23:21:17 2025 -0500 Upload files to "/" diff --git a/enhanced_4chan_analysis(3).py b/enhanced_4chan_analysis(3).py new file mode 100644 index 0000000..29f1737 --- /dev/null +++ b/enhanced_4chan_analysis(3).py @@ -0,0 +1,246 @@ +#!/bin/env python3 + +import json +import numpy as np +import pandas as pd +import requests +import matplotlib.pyplot as plt +from datetime import datetime +from scipy.stats import linregress + + +class FourChanAnalyzer: + def __init__(self): + self.data = None + self.df = None + self.seasons = { + "Season 1": ("2023-04-18", "2023-05-31"), + "Season 2": ("2023-12-18", "2024-01-28"), + "Season 3": ("2024-10-27", "2024-12-08"), + "Season 4": ("2025-06-13", "2025-06-28"), + "Bloodgames-Bitchtank": ("2024-06-24", "2024-08-02") + } + + def fetch_data(self, board="tv"): + """Fetch data from 4stats.io API""" + print(f"๐Ÿ“ก Fetching data from 4stats.io for /{board}/...") + + headers = { + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0", + "Accept": "application/json, text/plain, */*", + "Accept-Encoding": "gzip, deflate, br, zstd", + "Origin": "https://4stats.io", + "DNT": "1", + "Connection": "keep-alive", + "Pragma": "no-cache", + "Cache-Control": "no-cache" + } + + try: + response = requests.get(f"https://api.4stats.io/history/day/{board}", + headers=headers, timeout=30) + response.raise_for_status() + self.data = response.json() + print(f"โœ… Downloaded {len(self.data)} records.") + return True + except requests.exceptions.RequestException as e: + print(f"โŒ Failed to fetch data: {e}") + return False + + def filter_and_normalize(self, start_year=2023): + """Filter data from specified year and normalize posts per day""" + if not self.data: + raise ValueError("No data available. Run fetch_data() first.") + + # Filter from specified year onward + cutoff = datetime(start_year, 1, 1).timestamp() * 1000 + filtered_data = [row for row in self.data if row[0] >= cutoff] + print(f"โœ‚๏ธ Trimmed to {len(filtered_data)} records from {start_year} onward.") + + # Clean data - remove rows with None values in critical columns + clean_data = [] + for row in filtered_data: + if row[0] is not None and row[2] is not None: + clean_data.append(row) + + if len(clean_data) != len(filtered_data): + print(f"๐Ÿงน Removed {len(filtered_data) - len(clean_data)} rows with missing data.") + + if len(clean_data) == 0: + raise ValueError("No valid data remaining after cleaning.") + + # Normalize "Posts Per Day" (column 2, 0-indexed) + timestamps = np.array([row[0] for row in clean_data]) + posts_per_day = np.array([row[2] for row in clean_data], dtype=float) + + # Remove long-term trend using linear regression + slope, intercept, r_value, p_value, std_err = linregress(timestamps, posts_per_day) + trendline = slope * timestamps + intercept + mean_value = posts_per_day.mean() + adjusted_posts_per_day = posts_per_day - (trendline - mean_value) + + print(f"๐Ÿ“‰ Detrended data - Rยฒ: {r_value**2:.4f}, p-value: {p_value:.4e}") + print(f"๐Ÿ“ˆ Trend slope: {slope*86400000:.2f} posts/day per day") + + # Create adjusted dataset + adjusted_data = [ + [row[0], row[1], float(round(adj, 2)), row[3]] + for row, adj in zip(clean_data, adjusted_posts_per_day) + ] + + # Convert to DataFrame + self.df = pd.DataFrame(adjusted_data, columns=["Timestamp", "Column_2", "Posts_Per_Day", "Column_4"]) + self.df["Date"] = pd.to_datetime(self.df["Timestamp"], unit="ms") + + return self.df + + def get_season_stats(self): + """Calculate statistics for each season""" + if self.df is None: + raise ValueError("No processed data available. Run filter_and_normalize() first.") + + stats = {} + season_ranges = { + name: (datetime.strptime(start, "%Y-%m-%d"), datetime.strptime(end, "%Y-%m-%d")) + for name, (start, end) in self.seasons.items() + } + + for season_name, (start_date, end_date) in season_ranges.items(): + season_df = self.df[(self.df["Date"] >= start_date) & (self.df["Date"] <= end_date)] + + if len(season_df) > 0: + stats[season_name] = { + "mean": season_df["Posts_Per_Day"].mean(), + "std": season_df["Posts_Per_Day"].std(), + "max": season_df["Posts_Per_Day"].max(), + "min": season_df["Posts_Per_Day"].min(), + "days": len(season_df), + "start_date": start_date, + "end_date": end_date + } + + return stats + + def plot_seasons_comparison(self, max_days=42, figsize=(12, 8)): + """Create comparison plot across seasons""" + if self.df is None: + raise ValueError("No processed data available. Run filter_and_normalize() first.") + + print(f"๐Ÿ“Š Creating comparison graph (max {max_days} days)...") + + plt.figure(figsize=figsize) + season_ranges = { + name: (datetime.strptime(start, "%Y-%m-%d"), datetime.strptime(end, "%Y-%m-%d")) + for name, (start, end) in self.seasons.items() + } + + # Define specific colors for each season + season_colors = { + "Season 1": "blue", + "Season 2": "orange", + "Season 3": "green", + "Season 4": "red", + "Bloodgames-Bitchtank": "purple" + } + + for season_name, (start_date, end_date) in season_ranges.items(): + season_df = self.df[(self.df["Date"] >= start_date) & (self.df["Date"] <= end_date)].copy() + + if len(season_df) == 0: + print(f"โš ๏ธ No data found for {season_name}") + continue + + season_df["Day_Index"] = (season_df["Date"] - start_date).dt.days + 1 + season_df = season_df[season_df["Day_Index"] <= max_days] + + plt.plot(season_df["Day_Index"], season_df["Posts_Per_Day"], + label=f"{season_name} (n={len(season_df)})", + color=season_colors[season_name], linewidth=2, alpha=0.8) + + plt.xlabel("Days from Season Start", fontsize=12) + plt.ylabel("Normalized Posts Per Day", fontsize=12) + plt.title("4chan /tv/ Activity Across Seasons\n(Detrended and Normalized)", fontsize=14, pad=20) + plt.legend(loc='upper right') + plt.grid(True, alpha=0.3) + plt.tight_layout() + + return plt.gcf() + + def plot_full_timeline(self, figsize=(15, 8)): + """Plot full timeline with season highlights""" + if self.df is None: + raise ValueError("No processed data available. Run filter_and_normalize() first.") + + plt.figure(figsize=figsize) + + # Plot full timeline + plt.plot(self.df["Date"], self.df["Posts_Per_Day"], + color='lightgray', alpha=0.7, linewidth=1) + + # Highlight seasons + season_ranges = { + name: (datetime.strptime(start, "%Y-%m-%d"), datetime.strptime(end, "%Y-%m-%d")) + for name, (start, end) in self.seasons.items() + } + + # Define specific colors for each season + season_colors = { + "Season 1": "blue", + "Season 2": "orange", + "Season 3": "green", + "Season 4": "red", + "Bloodgames-Bitchtank": "purple" + } + + for season_name, (start_date, end_date) in season_ranges.items(): + season_df = self.df[(self.df["Date"] >= start_date) & (self.df["Date"] <= end_date)] + + if len(season_df) > 0: + plt.plot(season_df["Date"], season_df["Posts_Per_Day"], + color=season_colors[season_name], linewidth=3, alpha=0.8, label=season_name) + + plt.xlabel("Date", fontsize=12) + plt.ylabel("Normalized Posts Per Day", fontsize=12) + plt.title("4chan /tv/ Activity Timeline with Season Highlights", fontsize=14, pad=20) + plt.legend() + plt.grid(True, alpha=0.3) + plt.xticks(rotation=45) + plt.tight_layout() + + return plt.gcf() + + def print_summary(self): + """Print summary statistics""" + stats = self.get_season_stats() + + print("\n๐Ÿ“Š SEASON SUMMARY STATISTICS") + print("=" * 50) + + for season_name, season_stats in stats.items(): + print(f"\n{season_name}:") + print(f" Duration: {season_stats['days']} days") + print(f" Mean posts/day: {season_stats['mean']:.1f}") + print(f" Std deviation: {season_stats['std']:.1f}") + print(f" Range: {season_stats['min']:.1f} - {season_stats['max']:.1f}") + print(f" Dates: {season_stats['start_date'].strftime('%Y-%m-%d')} to {season_stats['end_date'].strftime('%Y-%m-%d')}") + +def main(): + """Main execution function""" + analyzer = FourChanAnalyzer() + + # Fetch and process data + if not analyzer.fetch_data(): + return + + analyzer.filter_and_normalize() + analyzer.print_summary() + + # Create visualizations + fig1 = analyzer.plot_seasons_comparison() + plt.show() + + fig2 = analyzer.plot_full_timeline() + plt.show() + +if __name__ == "__main__": + main()