filips123 · February 7, 2021 08:47
diff --git a/README.md b/README.md
diff --git a/main.py b/main.py
 from urllib.parse import urlparse
 from shutil import copyfile

 import subprocess
 import datetime
 import argparse
 import sys
 import os
 import re

 def execute(*command, stdout=False, stderr=True):
    FNULL = open(os.devnull, 'w')

    if stdout: stdout = sys.stdout
    else: stdout = FNULL

    if stderr: stderr = sys.stderr
    else: stderr = FNULL

    return subprocess.call(command, stdout=stdout, stderr=stderr)

 def download(httrack, depth, location, cache, url, *args):
    cmd = [
        httrack, url, '-O', location + ',' + cache, '-I0', '--depth=' + str(depth),
        '--display=2', '--timeout=60', '--retries=99', '--sockets=7', '--connection-per-second=5', '--max-rate=250000', '--keep-alive', '--mirror', '--clean', '--robots=0',
        '--user-agent', '\'$(httrack --version); ZeroArchive ()\'',
        *args
    ]

    return execute(*cmd)

 def navigation(zite, archive, url):
    copyfile('template.html', os.path.join(zite, 'index.html'))

    parsed = urlparse(url)

    hostname = parsed.netloc.replace(':', '_')
    if parsed.path:
        path = parsed.path[1:] if parsed.path.lower().endswith('.html') or parsed.path.lower().endswith('.htm') else parsed.path[1:] + '.html'
    else:
        path = 'index.html'

    with open(os.path.join(zite, 'archives.csv'), 'a') as archives:
        name = os.path.basename(os.path.normpath(archive))
        href = os.path.normpath(os.path.join(name, hostname, path)).replace('\\', '/')
        archives.write(name + ',' + href + '\n')

    pattern = re.compile(r'<title>(.*)<\/title>')

    for i, line in enumerate(open(os.path.join(archive, hostname, path), encoding='utf8')):
        for match in re.finditer(pattern, line):
            title = match.groups()[0]
            break

    if not 'title' in locals():
        title = 'Website'

    with open(os.path.join(zite, 'index.html'), 'r', encoding='utf8') as file:
        index = file.read()

    index = index.replace('%SITE-ORIGINAL%', url)
    index = index.replace('%SITE-TITLE%', title)

    archives = []

    for line in reversed(list(open(os.path.join(zite, 'archives.csv')))):
        data = line.strip().split(',')

        try:
            name = datetime.datetime.strptime(data[0], '%Y%m%d%H%M%S').strftime('%Y-%m-%d %H:%M:%S')
            href = data[1]
        except:
            continue

        archive = '<li><a href="' + href + '">' + name + '</a></li>'
        archives.append(archive)

    index = index.replace('%SITE-ARCHIVES%', '\n' + '\n'.join(archives) + '\n')

    with open(os.path.join(zite, 'index.html'), 'w', encoding='utf8') as file:
        file.write(index)

 def sign(zeronet, address, privkey):
    return execute(zeronet, 'siteSign', address, privkey)

 def publish(zeronet, address):
    return execute(zeronet, 'sitePublish', address)

 def main():
    parser = argparse.ArgumentParser(
        prog=__package__,
        description='Download website and publish it to ZeroNet'
    )

    parser.add_argument('website', type=str, nargs='+', help='website URLs to download')
    parser.add_argument('--depth', type=int, default=5, help='depth to clone')

    parser.add_argument('--zite', required=True, help='path to ZeroNet site')
    parser.add_argument('--address', required=True, help='address of ZeroNet site')
    parser.add_argument('--privkey', required=True, help='private key of ZeroNet site')

    parser.add_argument('--zeronet', required=True, help='path to ZeroNet installation')
    parser.add_argument('--httrack', required=True, help='path to HTTrack installation')

    if len(sys.argv) == 1:
        parser.print_help(sys.stderr)
        sys.exit(1)

    args = parser.parse_args()
    timestamp = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')

    print('Downloading website')
    st = download(args.httrack, args.depth, os.path.join(args.zite, timestamp), os.path.join(args.zite, 'cache'), *args.website)
    if st:
        print('Error while downloading website', file=sys.stderr)
        sys.exit(1)

    print('Creating navigation page')
    navigation(args.zite, os.path.join(args.zite, timestamp), args.website[0])

    print('Signing site')
    st = sign(args.zeronet, args.address, args.privkey)
    if st:
        print('Error while signing site', file=sys.stderr)
        sys.exit(1)

    print('Publishing site')
    st = sign(args.zeronet, args.address, args.privkey)
    if st:
        print('Error while publishing site', file=sys.stderr)
        sys.exit(1)

    print('Done')

 main()
diff --git a/template.html b/template.html
 <!DOCTYPE html>

 <html>
    <head>
        <title>ZeroArchive for %SITE-TITLE%</title>

        <meta charset="utf-8" />
        <meta http-equiv="content-type" content="text/html; charset=utf-8" />
        <meta name="viewport" content="width=device-width, initial-scale=1" />

        <base href="" target="_top" id="base" />
        <script>base.href = document.location.href.replace("/media", "").replace("index.html", "").replace(/[&?]wrapper=False/, "").replace(/[&?]wrapper_nonce=[A-Za-z0-9]+/, "")</script>

        <style type="text/css">
            body {
                background-color: #f0f0f2;
                margin: 0;
                padding: 0;
                font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
            }
            div {
                width: 1500px;
                margin: 5em auto;
                padding: 50px;
                background-color: #fff;
                border-radius: 1em;
            }
            hr {
                border: 0;
                border-top: 1px solid #8c8c8c;
                border-bottom: 1px solid #fff;
            }
            a:link, a:visited {
                color: #38488f;
                text-decoration: none;
            }
            @media (max-width: 1500px) {
                body {
                    background-color: #fff;
                }
                div {
                    width: auto;
                    margin: 0 auto;
                    border-radius: 0;
                    padding: 1em;
                }
            }
        </style>

        <div>
            <h1>ZeroArchive for %SITE-TITLE%</h1>

            <p>This site is ZeroArchive mirror for <a href="%SITE-ORIGINAL%">%SITE-TITLE%</a>.</p>
            <p>It uses <a href="https://www.httrack.com/">HTTrack</a> for mirroring websites and <a href="https://gist.github.com/filips123/eafe298f7b4f1bafefcf494d4ae26e51">ZeroArchive</a> for publishing them to ZeroNet. Please see ZeroArchive website if you want to create a mirror for your website.</p>

            <hr />

            <p>
                <strong>Available archives:</strong>
                <ul>%SITE-ARCHIVES%</ul>
            </p>
        </div>
    </body>
 </html>
	from urllib.parse import urlparse
	from shutil import copyfile

	import subprocess
	import datetime
	import argparse
	import sys
	import os
	import re

	def execute(*command, stdout=False, stderr=True):
	FNULL = open(os.devnull, 'w')

	if stdout: stdout = sys.stdout
	else: stdout = FNULL

	if stderr: stderr = sys.stderr
	else: stderr = FNULL

	return subprocess.call(command, stdout=stdout, stderr=stderr)

	def download(httrack, depth, location, cache, url, *args):
	cmd = [
	httrack, url, '-O', location + ',' + cache, '-I0', '--depth=' + str(depth),
	'--display=2', '--timeout=60', '--retries=99', '--sockets=7', '--connection-per-second=5', '--max-rate=250000', '--keep-alive', '--mirror', '--clean', '--robots=0',
	'--user-agent', '\'$(httrack --version); ZeroArchive ()\'',
	*args
	]

	return execute(*cmd)

	def navigation(zite, archive, url):
	copyfile('template.html', os.path.join(zite, 'index.html'))

	parsed = urlparse(url)

	hostname = parsed.netloc.replace(':', '_')
	if parsed.path:
	path = parsed.path[1:] if parsed.path.lower().endswith('.html') or parsed.path.lower().endswith('.htm') else parsed.path[1:] + '.html'
	else:
	path = 'index.html'

	with open(os.path.join(zite, 'archives.csv'), 'a') as archives:
	name = os.path.basename(os.path.normpath(archive))
	href = os.path.normpath(os.path.join(name, hostname, path)).replace('\\', '/')
	archives.write(name + ',' + href + '\n')

	pattern = re.compile(r'<title>(.*)<\/title>')

	for i, line in enumerate(open(os.path.join(archive, hostname, path), encoding='utf8')):
	for match in re.finditer(pattern, line):
	title = match.groups()[0]
	break

	if not 'title' in locals():
	title = 'Website'

	with open(os.path.join(zite, 'index.html'), 'r', encoding='utf8') as file:
	index = file.read()

	index = index.replace('%SITE-ORIGINAL%', url)
	index = index.replace('%SITE-TITLE%', title)

	archives = []

	for line in reversed(list(open(os.path.join(zite, 'archives.csv')))):
	data = line.strip().split(',')

	try:
	name = datetime.datetime.strptime(data[0], '%Y%m%d%H%M%S').strftime('%Y-%m-%d %H:%M:%S')
	href = data[1]
	except:
	continue

	archive = '<li><a href="' + href + '">' + name + '</a></li>'
	archives.append(archive)

	index = index.replace('%SITE-ARCHIVES%', '\n' + '\n'.join(archives) + '\n')

	with open(os.path.join(zite, 'index.html'), 'w', encoding='utf8') as file:
	file.write(index)

	def sign(zeronet, address, privkey):
	return execute(zeronet, 'siteSign', address, privkey)

	def publish(zeronet, address):
	return execute(zeronet, 'sitePublish', address)

	def main():
	parser = argparse.ArgumentParser(
	prog=__package__,
	description='Download website and publish it to ZeroNet'
	)

	parser.add_argument('website', type=str, nargs='+', help='website URLs to download')
	parser.add_argument('--depth', type=int, default=5, help='depth to clone')

	parser.add_argument('--zite', required=True, help='path to ZeroNet site')
	parser.add_argument('--address', required=True, help='address of ZeroNet site')
	parser.add_argument('--privkey', required=True, help='private key of ZeroNet site')

	parser.add_argument('--zeronet', required=True, help='path to ZeroNet installation')
	parser.add_argument('--httrack', required=True, help='path to HTTrack installation')

	if len(sys.argv) == 1:
	parser.print_help(sys.stderr)
	sys.exit(1)

	args = parser.parse_args()
	timestamp = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')

	print('Downloading website')
	st = download(args.httrack, args.depth, os.path.join(args.zite, timestamp), os.path.join(args.zite, 'cache'), *args.website)
	if st:
	print('Error while downloading website', file=sys.stderr)
	sys.exit(1)

	print('Creating navigation page')
	navigation(args.zite, os.path.join(args.zite, timestamp), args.website[0])

	print('Signing site')
	st = sign(args.zeronet, args.address, args.privkey)
	if st:
	print('Error while signing site', file=sys.stderr)
	sys.exit(1)

	print('Publishing site')
	st = sign(args.zeronet, args.address, args.privkey)
	if st:
	print('Error while publishing site', file=sys.stderr)
	sys.exit(1)

	print('Done')

	main()
	<!DOCTYPE html>

	<html>
	<head>
	<title>ZeroArchive for %SITE-TITLE%</title>

	<meta charset="utf-8" />
	<meta http-equiv="content-type" content="text/html; charset=utf-8" />
	<meta name="viewport" content="width=device-width, initial-scale=1" />

	<base href="" target="_top" id="base" />
	<script>base.href = document.location.href.replace("/media", "").replace("index.html", "").replace(/[&?]wrapper=False/, "").replace(/[&?]wrapper_nonce=[A-Za-z0-9]+/, "")</script>

	<style type="text/css">
	body {
	background-color: #f0f0f2;
	margin: 0;
	padding: 0;
	font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
	}
	div {
	width: 1500px;
	margin: 5em auto;
	padding: 50px;
	background-color: #fff;
	border-radius: 1em;
	}
	hr {
	border: 0;
	border-top: 1px solid #8c8c8c;
	border-bottom: 1px solid #fff;
	}
	a:link, a:visited {
	color: #38488f;
	text-decoration: none;
	}
	@media (max-width: 1500px) {
	body {
	background-color: #fff;
	}
	div {
	width: auto;
	margin: 0 auto;
	border-radius: 0;
	padding: 1em;
	}
	}
	</style>

	<div>
	<h1>ZeroArchive for %SITE-TITLE%</h1>

	<p>This site is ZeroArchive mirror for <a href="%SITE-ORIGINAL%">%SITE-TITLE%</a>.</p>
	<p>It uses <a href="https://www.httrack.com/">HTTrack</a> for mirroring websites and <a href="https://gist.github.com/filips123/eafe298f7b4f1bafefcf494d4ae26e51">ZeroArchive</a> for publishing them to ZeroNet. Please see ZeroArchive website if you want to create a mirror for your website.</p>

	<hr />

	<p>
	<strong>Available archives:</strong>
	<ul>%SITE-ARCHIVES%</ul>
	</p>
	</div>
	</body>
	</html>