4chanFishtankDowntrend/4stat2.py

#!/bin/env python3
import json
import numpy as np
import pandas as pd
import requests
import matplotlib.pyplot as plt
from datetime import datetime
from scipy.stats import linregress

# === Step 1: Download JSON from 4stats.io ===
print("📡 Fetching data from 4stats.io...")
response = requests.get("https://api.4stats.io/history/day/tv", headers={
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0",
    "Accept": "application/json, text/plain, */*",
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "Origin": "https://4stats.io",
    "DNT": "1",
    "Connection": "keep-alive",
    "Pragma": "no-cache",
    "Cache-Control": "no-cache"
})
if response.status_code != 200:
    raise Exception(f"Failed to fetch data. Status code: {response.status_code}")
data = response.json()
print(f"✅ Downloaded {len(data)} records.")

# === Step 2: Filter from Jan 1, 2023 onward ===
cutoff = datetime(2023, 1, 1).timestamp() * 1000  # in ms
filtered_data = [row for row in data if row[0] >= cutoff]
print(f"✂️ Trimmed to {len(filtered_data)} records from 2023 onward.")

# === Step 3: Normalize "Posts Per Day" (column 3) ===
filtered_data = [row for row in filtered_data if row[2] is not None]  # drop None values
print(f"🧹 Dropped None rows, {len(filtered_data)} records remaining.")

timestamps = np.array([row[0] for row in filtered_data])
posts_per_day = np.array([row[2] for row in filtered_data], dtype=float)

slope, intercept, *_ = linregress(timestamps, posts_per_day)
trendline = slope * timestamps + intercept
mean_value = posts_per_day.mean()
adjusted_posts_per_day = posts_per_day - (trendline - mean_value)
adjusted_data = [
    [row[0], row[1], float(round(adj, 2)), row[3]]
    for row, adj in zip(filtered_data, adjusted_posts_per_day)
]

# === Step 4: Convert adjusted data into DataFrame ===
df = pd.DataFrame(adjusted_data, columns=["Timestamp", "Column_2", "Posts_Per_Day", "Column_4"])
df["Date"] = pd.to_datetime(df["Timestamp"], unit="ms")

# === Step 5: Define Season Ranges ===
seasons = {
    "Season 1": ("2023-04-18", "2023-05-30"),
    "Season 2": ("2023-12-18", "2024-01-28"),
    "Season 3": ("2024-10-27", "2024-12-07"),
    "Season 4": ("2025-06-13", "2025-06-30"),
    "Bloodgames-Bitchtank": ("2024-06-24", "2024-08-08"),
    "Season 5": ("2026-03-15", "2026-04-13")
}
season_ranges = {
    name: (datetime.strptime(start, "%Y-%m-%d"), datetime.strptime(end, "%Y-%m-%d"))
    for name, (start, end) in seasons.items()
}

# === Step 6: Plot the Graph ===
styles = ['-', '--', '-.', ':', (0,(3,1,1,1)), (0,(5,1))]
plt.figure(figsize=(10, 6))
for i, (season_name, (start_date, end_date)) in enumerate(season_ranges.items()):
    season_df = df[(df["Date"] >= start_date) & (df["Date"] <= end_date)].copy()
    season_df["Day_Index"] = (season_df["Date"] - start_date).dt.days + 1
    season_df = season_df[season_df["Day_Index"] <= 42]
    plt.plot(season_df["Day_Index"], season_df["Posts_Per_Day"], label=season_name, linestyle=styles[i], linewidth=2)


plt.xlabel("Days (1 to 42)")
plt.ylabel("Posts Per Day")
plt.title("Posts Per Day Across Seasons (4chan /tv/)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()