83 lines
3.1 KiB
Python
83 lines
3.1 KiB
Python
#!/bin/env python3
|
|
import json
|
|
import numpy as np
|
|
import pandas as pd
|
|
import requests
|
|
import matplotlib.pyplot as plt
|
|
from datetime import datetime
|
|
from scipy.stats import linregress
|
|
|
|
# === Step 1: Download JSON from 4stats.io ===
|
|
print("📡 Fetching data from 4stats.io...")
|
|
response = requests.get("https://api.4stats.io/history/day/tv", headers={
|
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0",
|
|
"Accept": "application/json, text/plain, */*",
|
|
"Accept-Encoding": "gzip, deflate, br, zstd",
|
|
"Origin": "https://4stats.io",
|
|
"DNT": "1",
|
|
"Connection": "keep-alive",
|
|
"Pragma": "no-cache",
|
|
"Cache-Control": "no-cache"
|
|
})
|
|
if response.status_code != 200:
|
|
raise Exception(f"Failed to fetch data. Status code: {response.status_code}")
|
|
data = response.json()
|
|
print(f"✅ Downloaded {len(data)} records.")
|
|
|
|
# === Step 2: Filter from Jan 1, 2023 onward ===
|
|
cutoff = datetime(2023, 1, 1).timestamp() * 1000 # in ms
|
|
filtered_data = [row for row in data if row[0] >= cutoff]
|
|
print(f"✂️ Trimmed to {len(filtered_data)} records from 2023 onward.")
|
|
|
|
# === Step 3: Normalize "Posts Per Day" (column 3) ===
|
|
filtered_data = [row for row in filtered_data if row[2] is not None] # drop None values
|
|
print(f"🧹 Dropped None rows, {len(filtered_data)} records remaining.")
|
|
|
|
timestamps = np.array([row[0] for row in filtered_data])
|
|
posts_per_day = np.array([row[2] for row in filtered_data], dtype=float)
|
|
|
|
slope, intercept, *_ = linregress(timestamps, posts_per_day)
|
|
trendline = slope * timestamps + intercept
|
|
mean_value = posts_per_day.mean()
|
|
adjusted_posts_per_day = posts_per_day - (trendline - mean_value)
|
|
adjusted_data = [
|
|
[row[0], row[1], float(round(adj, 2)), row[3]]
|
|
for row, adj in zip(filtered_data, adjusted_posts_per_day)
|
|
]
|
|
|
|
# === Step 4: Convert adjusted data into DataFrame ===
|
|
df = pd.DataFrame(adjusted_data, columns=["Timestamp", "Column_2", "Posts_Per_Day", "Column_4"])
|
|
df["Date"] = pd.to_datetime(df["Timestamp"], unit="ms")
|
|
|
|
# === Step 5: Define Season Ranges ===
|
|
seasons = {
|
|
"Season 1": ("2023-04-18", "2023-05-30"),
|
|
"Season 2": ("2023-12-18", "2024-01-28"),
|
|
"Season 3": ("2024-10-27", "2024-12-07"),
|
|
"Season 4": ("2025-06-13", "2025-06-30"),
|
|
"Bloodgames-Bitchtank": ("2024-06-24", "2024-08-08"),
|
|
"Season 5": ("2026-03-15", "2026-04-13")
|
|
}
|
|
season_ranges = {
|
|
name: (datetime.strptime(start, "%Y-%m-%d"), datetime.strptime(end, "%Y-%m-%d"))
|
|
for name, (start, end) in seasons.items()
|
|
}
|
|
|
|
# === Step 6: Plot the Graph ===
|
|
styles = ['-', '--', '-.', ':', (0,(3,1,1,1)), (0,(5,1))]
|
|
plt.figure(figsize=(10, 6))
|
|
for i, (season_name, (start_date, end_date)) in enumerate(season_ranges.items()):
|
|
season_df = df[(df["Date"] >= start_date) & (df["Date"] <= end_date)].copy()
|
|
season_df["Day_Index"] = (season_df["Date"] - start_date).dt.days + 1
|
|
season_df = season_df[season_df["Day_Index"] <= 42]
|
|
plt.plot(season_df["Day_Index"], season_df["Posts_Per_Day"], label=season_name, linestyle=styles[i], linewidth=2)
|
|
|
|
|
|
plt.xlabel("Days (1 to 42)")
|
|
plt.ylabel("Posts Per Day")
|
|
plt.title("Posts Per Day Across Seasons (4chan /tv/)")
|
|
plt.legend()
|
|
plt.grid(True)
|
|
plt.tight_layout()
|
|
plt.show()
|