|
import datetime |
|
import shelve |
|
import json |
|
import operator |
|
|
|
import requests |
|
from bs4 import BeautifulSoup as bs |
|
import pygal |
|
|
|
|
|
class SnapshotNotAvailable(Exception): |
|
|
|
pass |
|
|
|
|
|
def request_wayback(t): |
|
|
|
r = requests.get( |
|
"http://archive.org/wayback/available", |
|
params={ |
|
"url": "http://rubattle.net", |
|
"timestamp": t.strftime("%Y%m%d"), |
|
} |
|
) |
|
info = json.loads(r.text) |
|
snap = info["archived_snapshots"]["closest"] |
|
if snap["available"]: |
|
|
|
return ( |
|
datetime.datetime.strptime(snap["timestamp"], "%Y%m%d%H%M%S"), |
|
snap["url"] |
|
) |
|
|
|
else: |
|
|
|
raise SnapshotNotAvailable |
|
|
|
|
|
def request_users(url): |
|
|
|
r = requests.get(url) |
|
phtml = bs(r.text) |
|
status = phtml.find(id="serverStatusList") |
|
|
|
for count, name in zip(status("span"), status("a")): |
|
|
|
yield name.text, int(count.text) |
|
|
|
|
|
def request_and_store(t, db): |
|
|
|
t, url = request_wayback(t) |
|
key = t.strftime(datetime_format) |
|
if key not in db: |
|
|
|
online = tuple(request_users(url)) |
|
db[key] = online |
|
db.sync() |
|
print(len(db), t, online) |
|
|
|
return t |
|
|
|
|
|
def find_all_times(left, right, db): |
|
|
|
print("->", left, right) |
|
dt = (right - left) / 2 |
|
if dt >= delta: |
|
|
|
mid = left + dt |
|
t = request_and_store(mid, db) |
|
if left <= t <= right: |
|
|
|
find_all_times(left, min(t, mid), db) |
|
find_all_times(max(t, mid), right, db) |
|
|
|
|
|
with shelve.open("store.shelve") as db: |
|
|
|
datetime_format = "%Y-%m-%d-%H" |
|
delta = datetime.timedelta(seconds=60 * 60) |
|
default_start = datetime.datetime(year=2010, month=1, day=1).strftime(datetime_format) |
|
|
|
left = request_and_store(datetime.datetime.strptime(max(db, default=default_start), datetime_format), db) |
|
right = request_and_store(datetime.datetime.now(), db) |
|
|
|
find_all_times(left, right, db) |
|
print(str.format("Found {} pages", len(db))) |
|
|
|
plot = pygal.DateY( |
|
show_minor_x_labels=False, |
|
x_label_rotation=30, |
|
x_labels_major_count=7, |
|
x_label_format="%Y-%m-%d", |
|
legend_at_bottom=True, |
|
include_x_axis=True, |
|
) |
|
plot.title = "PG players online per hours interval" |
|
|
|
d2_tag = "Diablo II LOD" |
|
get_d2_point = lambda key: (datetime.datetime.strptime(key, datetime_format), dict(db[key])[d2_tag]) |
|
night, day, prime = [], [], [] |
|
for t, c in sorted(map(get_d2_point, db)): |
|
|
|
t += datetime.timedelta(hours=4) # utc+4 moscow |
|
point = (t, c) |
|
if 1 <= t.hour < 10: |
|
|
|
night.append(point) |
|
|
|
elif 10 <= t.hour < 18: |
|
|
|
day.append(point) |
|
|
|
else: |
|
|
|
prime.append(point) |
|
|
|
for data, name in zip((night, day, prime), ("night [1; 10)", "day [10; 18)", "prime [18; 1)")): |
|
|
|
plot.add("Diablo II " + name, data) |
|
|
|
plot.render_to_file("PG players online.svg") |