|
from urllib.parse import urlparse |
|
from shutil import copyfile |
|
|
|
import subprocess |
|
import datetime |
|
import argparse |
|
import sys |
|
import os |
|
import re |
|
|
|
def execute(*command, stdout=False, stderr=True): |
|
FNULL = open(os.devnull, 'w') |
|
|
|
if stdout: stdout = sys.stdout |
|
else: stdout = FNULL |
|
|
|
if stderr: stderr = sys.stderr |
|
else: stderr = FNULL |
|
|
|
return subprocess.call(command, stdout=stdout, stderr=stderr) |
|
|
|
def download(httrack, depth, location, cache, url, *args): |
|
cmd = [ |
|
httrack, url, '-O', location + ',' + cache, '-I0', '--depth=' + str(depth), |
|
'--display=2', '--timeout=60', '--retries=99', '--sockets=7', '--connection-per-second=5', '--max-rate=250000', '--keep-alive', '--mirror', '--clean', '--robots=0', |
|
'--user-agent', '\'$(httrack --version); ZeroArchive ()\'', |
|
*args |
|
] |
|
|
|
return execute(*cmd) |
|
|
|
def navigation(zite, archive, url): |
|
copyfile('template.html', os.path.join(zite, 'index.html')) |
|
|
|
parsed = urlparse(url) |
|
|
|
hostname = parsed.netloc.replace(':', '_') |
|
if parsed.path: |
|
path = parsed.path[1:] if parsed.path.lower().endswith('.html') or parsed.path.lower().endswith('.htm') else parsed.path[1:] + '.html' |
|
else: |
|
path = 'index.html' |
|
|
|
with open(os.path.join(zite, 'archives.csv'), 'a') as archives: |
|
name = os.path.basename(os.path.normpath(archive)) |
|
href = os.path.normpath(os.path.join(name, hostname, path)).replace('\\', '/') |
|
archives.write(name + ',' + href + '\n') |
|
|
|
pattern = re.compile(r'<title>(.*)<\/title>') |
|
|
|
for i, line in enumerate(open(os.path.join(archive, hostname, path), encoding='utf8')): |
|
for match in re.finditer(pattern, line): |
|
title = match.groups()[0] |
|
break |
|
|
|
if not 'title' in locals(): |
|
title = 'Website' |
|
|
|
with open(os.path.join(zite, 'index.html'), 'r', encoding='utf8') as file: |
|
index = file.read() |
|
|
|
index = index.replace('%SITE-ORIGINAL%', url) |
|
index = index.replace('%SITE-TITLE%', title) |
|
|
|
archives = [] |
|
|
|
for line in reversed(list(open(os.path.join(zite, 'archives.csv')))): |
|
data = line.strip().split(',') |
|
|
|
try: |
|
name = datetime.datetime.strptime(data[0], '%Y%m%d%H%M%S').strftime('%Y-%m-%d %H:%M:%S') |
|
href = data[1] |
|
except: |
|
continue |
|
|
|
archive = '<li><a href="' + href + '">' + name + '</a></li>' |
|
archives.append(archive) |
|
|
|
index = index.replace('%SITE-ARCHIVES%', '\n' + '\n'.join(archives) + '\n') |
|
|
|
with open(os.path.join(zite, 'index.html'), 'w', encoding='utf8') as file: |
|
file.write(index) |
|
|
|
def sign(zeronet, address, privkey): |
|
return execute(zeronet, 'siteSign', address, privkey) |
|
|
|
def publish(zeronet, address): |
|
return execute(zeronet, 'sitePublish', address) |
|
|
|
def main(): |
|
parser = argparse.ArgumentParser( |
|
prog=__package__, |
|
description='Download website and publish it to ZeroNet' |
|
) |
|
|
|
parser.add_argument('website', type=str, nargs='+', help='website URLs to download') |
|
parser.add_argument('--depth', type=int, default=5, help='depth to clone') |
|
|
|
parser.add_argument('--zite', required=True, help='path to ZeroNet site') |
|
parser.add_argument('--address', required=True, help='address of ZeroNet site') |
|
parser.add_argument('--privkey', required=True, help='private key of ZeroNet site') |
|
|
|
parser.add_argument('--zeronet', required=True, help='path to ZeroNet installation') |
|
parser.add_argument('--httrack', required=True, help='path to HTTrack installation') |
|
|
|
if len(sys.argv) == 1: |
|
parser.print_help(sys.stderr) |
|
sys.exit(1) |
|
|
|
args = parser.parse_args() |
|
timestamp = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S') |
|
|
|
print('Downloading website') |
|
st = download(args.httrack, args.depth, os.path.join(args.zite, timestamp), os.path.join(args.zite, 'cache'), *args.website) |
|
if st: |
|
print('Error while downloading website', file=sys.stderr) |
|
sys.exit(1) |
|
|
|
print('Creating navigation page') |
|
navigation(args.zite, os.path.join(args.zite, timestamp), args.website[0]) |
|
|
|
print('Signing site') |
|
st = sign(args.zeronet, args.address, args.privkey) |
|
if st: |
|
print('Error while signing site', file=sys.stderr) |
|
sys.exit(1) |
|
|
|
print('Publishing site') |
|
st = sign(args.zeronet, args.address, args.privkey) |
|
if st: |
|
print('Error while publishing site', file=sys.stderr) |
|
sys.exit(1) |
|
|
|
print('Done') |
|
|
|
main() |