Forked from eighthave/find-https-debian-archives.py
Last active
May 26, 2024 03:45
-
-
Save HacKanCuBa/e3a998d68a82f81dbf11f2cce4f26d04 to your computer and use it in GitHub Desktop.
Script to find official Debian mirrors that support HTTPS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Find Debian HTTPS archives. | |
Script based on https://gist.github.com/eighthave/7285154 | |
I made it asynchronous and parallel, so overall I measured it to be 6 times faster or more. | |
Requires Python 3.7+ | |
Additional resources not exactly related to this script but could be helpful for | |
those than are looking for mirrors: | |
* https://wiki.debian.org/DebianGeoMirror#deb.debian.org | |
* https://deb.debian.org/ <-- this one works through https | |
* https://debgen.simplylinux.ch | |
""" | |
import asyncio | |
import concurrent.futures | |
import http.client | |
import re | |
import ssl | |
import sys | |
import typing | |
import urllib.error | |
import urllib.request | |
assert sys.version_info >= (3, 7), 'You need Python 3.7+ to run this script' | |
# Increasing this value has potential to make it faster, but there's also a | |
# chance for it to be slower. There's no way to know but trying, and | |
# nevertheless you might be using this script only once per machine. | |
PARALLEL_COUNT: int = 64 | |
def try_url(url: str) -> typing.Optional[http.client.HTTPResponse]: | |
try: | |
return urllib.request.urlopen(url, timeout=1) | |
except (ssl.SSLError, ssl.SSLCertVerificationError) as err: | |
print('Bad TLS!', url, err) | |
except urllib.error.URLError as err: | |
print('Failure!', url, err) | |
except Exception as err: | |
print('Unexpected failure!', url, err) | |
async def search_mirror_list(mirror_list_url: str, | |
url_pattern: typing.AnyStr) -> typing.Set[str]: | |
def inner(url_) -> typing.Optional[str]: | |
print('Trying:', url_, '...') | |
sys.stdout.flush() | |
if try_url(url_): | |
print('Success!', url_) | |
return url_ | |
mirrors = try_url(mirror_list_url) | |
if not mirrors: | |
return set() | |
urls = set() | |
for line in mirrors.readlines(): | |
url_match = re.match(url_pattern, line.decode()) | |
if url_match: | |
urls.add(f'https{url_match.group(1)}') | |
https = set() | |
with concurrent.futures.ThreadPoolExecutor(max_workers=PARALLEL_COUNT) as executor: | |
loop = asyncio.get_running_loop() | |
futures = [] | |
for url in urls: | |
futures.append( | |
loop.run_in_executor( | |
executor, | |
inner, | |
url, | |
), | |
) | |
for potential_url in await asyncio.gather(*futures): | |
if potential_url: | |
https.add(potential_url) | |
return https | |
async def search_generic_mirrors() -> typing.Set[str]: | |
# find generic mirrors | |
https = await search_mirror_list( | |
'https://www.debian.org/mirror/list', | |
r'.*<td valign="top"><a rel="nofollow" href="http(.*)">.*', | |
) | |
https.add('https://deb.debian.org') # It doesn't show up in the mirror list | |
return https | |
async def search_security_mirrors() -> typing.Set[str]: | |
# find security mirrors | |
return await search_mirror_list( | |
'https://www.debian.org/mirror/list-full', | |
r'.*</tt><br>Security updates over HTTP: <tt><a rel="nofollow" ' | |
r'href="http(.*)">.*/debian-security/</a>.*', | |
) | |
async def search_backports_mirrors() -> typing.Set[str]: | |
# now find the backports mirrors | |
return await search_mirror_list( | |
'https://backports-master.debian.org/Mirrors/', | |
r'.*<td><a href="http(.*)">.*/debian-backports/</a>.*', | |
) | |
async def search_cd_mirrors() -> typing.Set[str]: | |
# now find the CD image mirrors | |
return await search_mirror_list( | |
'https://www.debian.org/CD/http-ftp/', | |
r'.*<a rel="nofollow" href="http(:.*)">HTTP</a></li>.*', | |
) | |
def save_to_file(mirrors: typing.Dict[str, typing.Set[str]], filename: str) -> None: | |
# now write everything to a file | |
with open(filename, 'w') as f: | |
for category, urls in mirrors.items(): | |
f.write(f'{category.title()} repos\n') | |
f.write('---------------\n') | |
for url in urls: | |
f.write(f'{url}\n') | |
f.write('\n') | |
async def main() -> None: | |
print('Searching HTTPS mirrors...') | |
apt_mirrors = await search_generic_mirrors() | |
security_mirrors = await search_security_mirrors() | |
backports_mirrors = await search_backports_mirrors() | |
cd_mirrors = await search_cd_mirrors() | |
mirrors = { | |
'apt': apt_mirrors, | |
'security': security_mirrors, | |
'backports': backports_mirrors, | |
'cd': cd_mirrors, | |
} | |
filename = 'https-debian-archives.txt' | |
print('Saving to', filename, '...') | |
save_to_file(mirrors, filename) | |
if __name__ == '__main__': | |
asyncio.run(main()) |
Thank you! I found it! 💯
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey there! Thanks :)
This scripts reads the official debian mirror list to find mirrors, and parses it. I've just checked and there's 1 onion mirror, mentioned in a comment (which is skipped by the script):
lxpizzamm6twgep2.onion
. Check https://www.debian.org/mirror/list-full