Upload files to "/"
This commit is contained in:
@@ -0,0 +1,246 @@
|
||||
#!/bin/env python3
|
||||
|
||||
import json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import requests
|
||||
import matplotlib.pyplot as plt
|
||||
from datetime import datetime
|
||||
from scipy.stats import linregress
|
||||
|
||||
|
||||
class FourChanAnalyzer:
|
||||
def __init__(self):
|
||||
self.data = None
|
||||
self.df = None
|
||||
self.seasons = {
|
||||
"Season 1": ("2023-04-18", "2023-05-31"),
|
||||
"Season 2": ("2023-12-18", "2024-01-28"),
|
||||
"Season 3": ("2024-10-27", "2024-12-08"),
|
||||
"Season 4": ("2025-06-13", "2025-06-28"),
|
||||
"Bloodgames-Bitchtank": ("2024-06-24", "2024-08-02")
|
||||
}
|
||||
|
||||
def fetch_data(self, board="tv"):
|
||||
"""Fetch data from 4stats.io API"""
|
||||
print(f"📡 Fetching data from 4stats.io for /{board}/...")
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0",
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"Accept-Encoding": "gzip, deflate, br, zstd",
|
||||
"Origin": "https://4stats.io",
|
||||
"DNT": "1",
|
||||
"Connection": "keep-alive",
|
||||
"Pragma": "no-cache",
|
||||
"Cache-Control": "no-cache"
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.get(f"https://api.4stats.io/history/day/{board}",
|
||||
headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
self.data = response.json()
|
||||
print(f"✅ Downloaded {len(self.data)} records.")
|
||||
return True
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"❌ Failed to fetch data: {e}")
|
||||
return False
|
||||
|
||||
def filter_and_normalize(self, start_year=2023):
|
||||
"""Filter data from specified year and normalize posts per day"""
|
||||
if not self.data:
|
||||
raise ValueError("No data available. Run fetch_data() first.")
|
||||
|
||||
# Filter from specified year onward
|
||||
cutoff = datetime(start_year, 1, 1).timestamp() * 1000
|
||||
filtered_data = [row for row in self.data if row[0] >= cutoff]
|
||||
print(f"✂️ Trimmed to {len(filtered_data)} records from {start_year} onward.")
|
||||
|
||||
# Clean data - remove rows with None values in critical columns
|
||||
clean_data = []
|
||||
for row in filtered_data:
|
||||
if row[0] is not None and row[2] is not None:
|
||||
clean_data.append(row)
|
||||
|
||||
if len(clean_data) != len(filtered_data):
|
||||
print(f"🧹 Removed {len(filtered_data) - len(clean_data)} rows with missing data.")
|
||||
|
||||
if len(clean_data) == 0:
|
||||
raise ValueError("No valid data remaining after cleaning.")
|
||||
|
||||
# Normalize "Posts Per Day" (column 2, 0-indexed)
|
||||
timestamps = np.array([row[0] for row in clean_data])
|
||||
posts_per_day = np.array([row[2] for row in clean_data], dtype=float)
|
||||
|
||||
# Remove long-term trend using linear regression
|
||||
slope, intercept, r_value, p_value, std_err = linregress(timestamps, posts_per_day)
|
||||
trendline = slope * timestamps + intercept
|
||||
mean_value = posts_per_day.mean()
|
||||
adjusted_posts_per_day = posts_per_day - (trendline - mean_value)
|
||||
|
||||
print(f"📉 Detrended data - R²: {r_value**2:.4f}, p-value: {p_value:.4e}")
|
||||
print(f"📈 Trend slope: {slope*86400000:.2f} posts/day per day")
|
||||
|
||||
# Create adjusted dataset
|
||||
adjusted_data = [
|
||||
[row[0], row[1], float(round(adj, 2)), row[3]]
|
||||
for row, adj in zip(clean_data, adjusted_posts_per_day)
|
||||
]
|
||||
|
||||
# Convert to DataFrame
|
||||
self.df = pd.DataFrame(adjusted_data, columns=["Timestamp", "Column_2", "Posts_Per_Day", "Column_4"])
|
||||
self.df["Date"] = pd.to_datetime(self.df["Timestamp"], unit="ms")
|
||||
|
||||
return self.df
|
||||
|
||||
def get_season_stats(self):
|
||||
"""Calculate statistics for each season"""
|
||||
if self.df is None:
|
||||
raise ValueError("No processed data available. Run filter_and_normalize() first.")
|
||||
|
||||
stats = {}
|
||||
season_ranges = {
|
||||
name: (datetime.strptime(start, "%Y-%m-%d"), datetime.strptime(end, "%Y-%m-%d"))
|
||||
for name, (start, end) in self.seasons.items()
|
||||
}
|
||||
|
||||
for season_name, (start_date, end_date) in season_ranges.items():
|
||||
season_df = self.df[(self.df["Date"] >= start_date) & (self.df["Date"] <= end_date)]
|
||||
|
||||
if len(season_df) > 0:
|
||||
stats[season_name] = {
|
||||
"mean": season_df["Posts_Per_Day"].mean(),
|
||||
"std": season_df["Posts_Per_Day"].std(),
|
||||
"max": season_df["Posts_Per_Day"].max(),
|
||||
"min": season_df["Posts_Per_Day"].min(),
|
||||
"days": len(season_df),
|
||||
"start_date": start_date,
|
||||
"end_date": end_date
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def plot_seasons_comparison(self, max_days=42, figsize=(12, 8)):
|
||||
"""Create comparison plot across seasons"""
|
||||
if self.df is None:
|
||||
raise ValueError("No processed data available. Run filter_and_normalize() first.")
|
||||
|
||||
print(f"📊 Creating comparison graph (max {max_days} days)...")
|
||||
|
||||
plt.figure(figsize=figsize)
|
||||
season_ranges = {
|
||||
name: (datetime.strptime(start, "%Y-%m-%d"), datetime.strptime(end, "%Y-%m-%d"))
|
||||
for name, (start, end) in self.seasons.items()
|
||||
}
|
||||
|
||||
# Define specific colors for each season
|
||||
season_colors = {
|
||||
"Season 1": "blue",
|
||||
"Season 2": "orange",
|
||||
"Season 3": "green",
|
||||
"Season 4": "red",
|
||||
"Bloodgames-Bitchtank": "purple"
|
||||
}
|
||||
|
||||
for season_name, (start_date, end_date) in season_ranges.items():
|
||||
season_df = self.df[(self.df["Date"] >= start_date) & (self.df["Date"] <= end_date)].copy()
|
||||
|
||||
if len(season_df) == 0:
|
||||
print(f"⚠️ No data found for {season_name}")
|
||||
continue
|
||||
|
||||
season_df["Day_Index"] = (season_df["Date"] - start_date).dt.days + 1
|
||||
season_df = season_df[season_df["Day_Index"] <= max_days]
|
||||
|
||||
plt.plot(season_df["Day_Index"], season_df["Posts_Per_Day"],
|
||||
label=f"{season_name} (n={len(season_df)})",
|
||||
color=season_colors[season_name], linewidth=2, alpha=0.8)
|
||||
|
||||
plt.xlabel("Days from Season Start", fontsize=12)
|
||||
plt.ylabel("Normalized Posts Per Day", fontsize=12)
|
||||
plt.title("4chan /tv/ Activity Across Seasons\n(Detrended and Normalized)", fontsize=14, pad=20)
|
||||
plt.legend(loc='upper right')
|
||||
plt.grid(True, alpha=0.3)
|
||||
plt.tight_layout()
|
||||
|
||||
return plt.gcf()
|
||||
|
||||
def plot_full_timeline(self, figsize=(15, 8)):
|
||||
"""Plot full timeline with season highlights"""
|
||||
if self.df is None:
|
||||
raise ValueError("No processed data available. Run filter_and_normalize() first.")
|
||||
|
||||
plt.figure(figsize=figsize)
|
||||
|
||||
# Plot full timeline
|
||||
plt.plot(self.df["Date"], self.df["Posts_Per_Day"],
|
||||
color='lightgray', alpha=0.7, linewidth=1)
|
||||
|
||||
# Highlight seasons
|
||||
season_ranges = {
|
||||
name: (datetime.strptime(start, "%Y-%m-%d"), datetime.strptime(end, "%Y-%m-%d"))
|
||||
for name, (start, end) in self.seasons.items()
|
||||
}
|
||||
|
||||
# Define specific colors for each season
|
||||
season_colors = {
|
||||
"Season 1": "blue",
|
||||
"Season 2": "orange",
|
||||
"Season 3": "green",
|
||||
"Season 4": "red",
|
||||
"Bloodgames-Bitchtank": "purple"
|
||||
}
|
||||
|
||||
for season_name, (start_date, end_date) in season_ranges.items():
|
||||
season_df = self.df[(self.df["Date"] >= start_date) & (self.df["Date"] <= end_date)]
|
||||
|
||||
if len(season_df) > 0:
|
||||
plt.plot(season_df["Date"], season_df["Posts_Per_Day"],
|
||||
color=season_colors[season_name], linewidth=3, alpha=0.8, label=season_name)
|
||||
|
||||
plt.xlabel("Date", fontsize=12)
|
||||
plt.ylabel("Normalized Posts Per Day", fontsize=12)
|
||||
plt.title("4chan /tv/ Activity Timeline with Season Highlights", fontsize=14, pad=20)
|
||||
plt.legend()
|
||||
plt.grid(True, alpha=0.3)
|
||||
plt.xticks(rotation=45)
|
||||
plt.tight_layout()
|
||||
|
||||
return plt.gcf()
|
||||
|
||||
def print_summary(self):
|
||||
"""Print summary statistics"""
|
||||
stats = self.get_season_stats()
|
||||
|
||||
print("\n📊 SEASON SUMMARY STATISTICS")
|
||||
print("=" * 50)
|
||||
|
||||
for season_name, season_stats in stats.items():
|
||||
print(f"\n{season_name}:")
|
||||
print(f" Duration: {season_stats['days']} days")
|
||||
print(f" Mean posts/day: {season_stats['mean']:.1f}")
|
||||
print(f" Std deviation: {season_stats['std']:.1f}")
|
||||
print(f" Range: {season_stats['min']:.1f} - {season_stats['max']:.1f}")
|
||||
print(f" Dates: {season_stats['start_date'].strftime('%Y-%m-%d')} to {season_stats['end_date'].strftime('%Y-%m-%d')}")
|
||||
|
||||
def main():
|
||||
"""Main execution function"""
|
||||
analyzer = FourChanAnalyzer()
|
||||
|
||||
# Fetch and process data
|
||||
if not analyzer.fetch_data():
|
||||
return
|
||||
|
||||
analyzer.filter_and_normalize()
|
||||
analyzer.print_summary()
|
||||
|
||||
# Create visualizations
|
||||
fig1 = analyzer.plot_seasons_comparison()
|
||||
plt.show()
|
||||
|
||||
fig2 = analyzer.plot_full_timeline()
|
||||
plt.show()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user