Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save HacKanCuBa/e3a998d68a82f81dbf11f2cce4f26d04 to your computer and use it in GitHub Desktop.
Save HacKanCuBa/e3a998d68a82f81dbf11f2cce4f26d04 to your computer and use it in GitHub Desktop.
Script to find official Debian mirrors that support HTTPS
#!/usr/bin/env python3
"""Find Debian HTTPS archives.
Script based on https://gist.github.com/eighthave/7285154
I made it asynchronous and parallel, so overall I measured it to be 6 times faster or more.
Requires Python 3.7+
Additional resources not exactly related to this script but could be helpful for
those than are looking for mirrors:
* https://wiki.debian.org/DebianGeoMirror#deb.debian.org
* https://deb.debian.org/ <-- this one works through https
* https://debgen.simplylinux.ch
"""
import asyncio
import concurrent.futures
import http.client
import re
import ssl
import sys
import typing
import urllib.error
import urllib.request
assert sys.version_info >= (3, 7), 'You need Python 3.7+ to run this script'
# Increasing this value has potential to make it faster, but there's also a
# chance for it to be slower. There's no way to know but trying, and
# nevertheless you might be using this script only once per machine.
PARALLEL_COUNT: int = 64
def try_url(url: str) -> typing.Optional[http.client.HTTPResponse]:
try:
return urllib.request.urlopen(url, timeout=1)
except (ssl.SSLError, ssl.SSLCertVerificationError) as err:
print('Bad TLS!', url, err)
except urllib.error.URLError as err:
print('Failure!', url, err)
except Exception as err:
print('Unexpected failure!', url, err)
async def search_mirror_list(mirror_list_url: str,
url_pattern: typing.AnyStr) -> typing.Set[str]:
def inner(url_) -> typing.Optional[str]:
print('Trying:', url_, '...')
sys.stdout.flush()
if try_url(url_):
print('Success!', url_)
return url_
mirrors = try_url(mirror_list_url)
if not mirrors:
return set()
urls = set()
for line in mirrors.readlines():
url_match = re.match(url_pattern, line.decode())
if url_match:
urls.add(f'https{url_match.group(1)}')
https = set()
with concurrent.futures.ThreadPoolExecutor(max_workers=PARALLEL_COUNT) as executor:
loop = asyncio.get_running_loop()
futures = []
for url in urls:
futures.append(
loop.run_in_executor(
executor,
inner,
url,
),
)
for potential_url in await asyncio.gather(*futures):
if potential_url:
https.add(potential_url)
return https
async def search_generic_mirrors() -> typing.Set[str]:
# find generic mirrors
https = await search_mirror_list(
'https://www.debian.org/mirror/list',
r'.*<td valign="top"><a rel="nofollow" href="http(.*)">.*',
)
https.add('https://deb.debian.org') # It doesn't show up in the mirror list
return https
async def search_security_mirrors() -> typing.Set[str]:
# find security mirrors
return await search_mirror_list(
'https://www.debian.org/mirror/list-full',
r'.*</tt><br>Security updates over HTTP: <tt><a rel="nofollow" '
r'href="http(.*)">.*/debian-security/</a>.*',
)
async def search_backports_mirrors() -> typing.Set[str]:
# now find the backports mirrors
return await search_mirror_list(
'https://backports-master.debian.org/Mirrors/',
r'.*<td><a href="http(.*)">.*/debian-backports/</a>.*',
)
async def search_cd_mirrors() -> typing.Set[str]:
# now find the CD image mirrors
return await search_mirror_list(
'https://www.debian.org/CD/http-ftp/',
r'.*<a rel="nofollow" href="http(:.*)">HTTP</a></li>.*',
)
def save_to_file(mirrors: typing.Dict[str, typing.Set[str]], filename: str) -> None:
# now write everything to a file
with open(filename, 'w') as f:
for category, urls in mirrors.items():
f.write(f'{category.title()} repos\n')
f.write('---------------\n')
for url in urls:
f.write(f'{url}\n')
f.write('\n')
async def main() -> None:
print('Searching HTTPS mirrors...')
apt_mirrors = await search_generic_mirrors()
security_mirrors = await search_security_mirrors()
backports_mirrors = await search_backports_mirrors()
cd_mirrors = await search_cd_mirrors()
mirrors = {
'apt': apt_mirrors,
'security': security_mirrors,
'backports': backports_mirrors,
'cd': cd_mirrors,
}
filename = 'https-debian-archives.txt'
print('Saving to', filename, '...')
save_to_file(mirrors, filename)
if __name__ == '__main__':
asyncio.run(main())
@hellresistor
Copy link

Thank you! I found it! 💯

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment