Last active
May 9, 2022 19:38
-
-
Save SolomidHero/5c31b076ad73dde20f342a8c7f952a10 to your computer and use it in GitHub Desktop.
Logging to tensorboard
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
import logging | |
import datetime | |
from pathlib import PosixPath | |
import time | |
from torch.utils.tensorboard import SummaryWriter | |
check_frequency = datetime.timedelta(minutes=5) | |
aws_url = 's3://<bucket>/<path>' | |
log_key = 'Automatic log' | |
def aws_ls(url): | |
process = subprocess.Popen( | |
['aws', 's3', 'ls', str(url).replace('s3:/', 's3://') + '/'], | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
universal_newlines=True, | |
) | |
stdout, stderr = process.communicate() | |
if stderr: | |
logging.warning(stderr) | |
for line in stdout.split('\n'): | |
if line.strip() == '': | |
continue | |
line = line.strip().split() | |
if line[0] == 'PRE': | |
yield (None, line[1]) | |
continue | |
date = datetime.datetime.strptime(f'{line[0]} {line[1]}', "%Y-%m-%d %H:%M:%S") | |
yield (date, line[3]) | |
def capture_nodes(root_url, tree=None): | |
'''root / os_type / user_id / run_{} / hist_{}.zip''' | |
root_url = PosixPath(root_url) | |
if tree is None: | |
tree = {} | |
change_num = 0 | |
for _, os_type in aws_ls(root_url): | |
for _, user_id in aws_ls(root_url / os_type): | |
for _, run in aws_ls(root_url / os_type / user_id): | |
key = f"{os_type}/{user_id}" | |
nodes = list(aws_ls(root_url / os_type / user_id / run)) | |
if key in tree: | |
prev_size = len(tree[key]) | |
tree[key].update(nodes) | |
change_num += len(tree[key]) - prev_size | |
else: | |
tree[key] = set(nodes) | |
change_num += len(nodes) | |
return change_num, tree | |
if __name__ == "__main__": | |
total_nodes, tree = capture_nodes(aws_url) | |
start_date = datetime.datetime.now() | |
next_check = datetime.datetime.now() + check_frequency | |
scalar_logger = SummaryWriter('autologs_monitoring/') | |
i = 0 | |
scalar_logger.add_scalar(f'{log_key} pkgs', total_nodes, i) | |
scalar_logger.add_scalar(f'{log_key} delta', 0, i) | |
print(f'Starting: {total_nodes} nodes. Next check {next_check}') | |
while True: | |
time_left = (next_check - datetime.datetime.now()).total_seconds() | |
if time_left > 0: | |
time.sleep(time_left) | |
i += 1 | |
nodes_delta, tree = capture_nodes(aws_url, tree=tree) | |
total_nodes += nodes_delta | |
next_check += check_frequency | |
scalar_logger.add_scalar(f'{log_key} pkgs', total_nodes, i) | |
scalar_logger.add_scalar(f'{log_key} delta', nodes_delta, i) | |
print(f'{datetime.datetime.now()} Nodes: {total_nodes} (+{nodes_delta}). Next check {next_check}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment