Last active
March 13, 2023 08:21
-
-
Save erikvanzijst/71fc21fb10b8a112b741b7c152cb6248 to your computer and use it in GitHub Desktop.
Parallel web crawler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Simple multi-processing web crawler, following all a.href's that end in a '/'. | |
import os | |
import traceback | |
from itertools import chain | |
from multiprocessing import Pool | |
from time import time | |
from urllib.parse import urljoin | |
from bs4 import BeautifulSoup | |
import requests | |
session = requests.Session() | |
root = 'http://be.archive.ubuntu.com/ubuntu/dists/bionic/' | |
CONCURRENCY = 10 | |
def crawl(url: str) -> set[str]: | |
while True: | |
print(f'{os.getpid()} processing {url}') | |
try: | |
html = session.get(url).content | |
soup = BeautifulSoup(html, 'html.parser') | |
urls = {urljoin(url, a.get('href')) for a in soup.find_all('a')} | |
return set(filter(lambda u: u.startswith(root) and u.endswith('/'), urls)) | |
except IOError: | |
traceback.print_exc() | |
if __name__ == '__main__': | |
todo: set[str] = {root} | |
seen: set[str] = {root} | |
start = time() | |
with Pool(processes=CONCURRENCY) as pool: | |
while todo: | |
todo = set(chain(*pool.map(crawl, todo))) - seen | |
seen.update(todo) | |
print(f'{len(seen)} urls crawled in {time() - start:.2f} seconds ({len(seen) / (time() - start):.2f} urls/second with {CONCURRENCY} processes)') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Simple multiprocessing web crawler, following all a.href's that end in a '/'. | |
import os | |
import traceback | |
from concurrent.futures import ProcessPoolExecutor | |
from multiprocessing import Barrier | |
from urllib.parse import urljoin | |
from time import time | |
from bs4 import BeautifulSoup | |
import requests | |
root: str = 'http://be.archive.ubuntu.com/' | |
CONCURRENCY = 48 | |
session = requests.Session() | |
def crawl(url: str) -> set[str]: | |
while True: | |
print(f'{os.getpid()} processing {url}') | |
try: | |
html = session.get(url).content | |
soup = BeautifulSoup(html, 'html.parser') | |
urls = {urljoin(url, a.get('href')) for a in soup.find_all('a')} | |
return set(filter(lambda u: u.startswith(root) and u.endswith('/'), urls)) | |
except IOError: | |
traceback.print_exc() | |
if __name__ == '__main__': | |
seen: set[str] = {'http://be.archive.ubuntu.com/ubuntu/ubuntu/'} | |
barrier = Barrier(2) | |
inflight = 1 | |
start = time() | |
with ProcessPoolExecutor(CONCURRENCY) as executor: | |
def schedule(url: str) -> None: | |
def cb(urls): | |
global inflight | |
for u in urls - seen: | |
inflight += 1 | |
schedule(u) | |
inflight -= 1 | |
if not inflight: | |
barrier.wait() | |
seen.add(url) | |
executor.submit(crawl, url).add_done_callback(lambda future: cb(future.result())) | |
schedule(root) | |
barrier.wait() | |
print(f'{len(seen)} urls crawled in {time() - start:.2f} seconds ({len(seen) / (time() - start):.2f} urls/second with {CONCURRENCY} threads)') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Simple multithreaded web crawler, following all a.href's that end in a '/'. | |
from concurrent.futures import ThreadPoolExecutor | |
from urllib.parse import urljoin | |
from time import time | |
from threading import current_thread, Barrier, Lock | |
from bs4 import BeautifulSoup | |
import requests | |
root: str = 'http://be.archive.ubuntu.com/ubuntu/dists/bionic/' | |
CONCURRENCY = 10 | |
if __name__ == '__main__': | |
seen: set[str] = set() | |
barrier = Barrier(2) | |
lock = Lock() | |
inflight = 1 | |
start = time() | |
with ThreadPoolExecutor(CONCURRENCY) as executor, requests.Session() as session: | |
def crawl(url: str) -> None: | |
global inflight | |
print(f'{current_thread().ident} processing {url}') | |
html = session.get(url).content | |
soup = BeautifulSoup(html, 'html.parser') | |
paths = {urljoin(url, a.get('href')) for a in soup.find_all('a')} | |
with lock: | |
for u in filter(lambda p: p.startswith(root) and p.endswith('/'), paths - seen): | |
seen.add(u) | |
executor.submit(crawl, u) | |
inflight += 1 | |
inflight -= 1 | |
if not inflight: | |
barrier.wait() | |
crawl(root) | |
barrier.wait() | |
print(f'{len(seen)} urls crawled in {time() - start:.2f} seconds ({len(seen) / (time() - start):.2f} urls/second with {CONCURRENCY} threads)') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ python mp.py | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/by-hash/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dep11/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-i386/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/dep11/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/i18n/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/source/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/i18n/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-i386/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/i18n/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/by-hash/SHA256/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-amd64/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/i18n/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-i386/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/dep11/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/source/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-amd64/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-amd64/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/source/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/source/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-i386/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-amd64/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-amd64/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/18.04.17/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-amd64/by-hash/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-i386/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-i386/by-hash/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/source/by-hash/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-i386/by-hash/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/i18n/by-hash/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/dep11/by-hash/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-amd64/by-hash/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-i386/by-hash/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-amd64/by-hash/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-azure-amd64/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-amd64/by-hash/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/i18n/by-hash/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/current/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-i386/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-oem-amd64/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/source/by-hash/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/18.04.15/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dep11/by-hash/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/dep11/by-hash/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-amd64/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-amd64/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-i386/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/i18n/by-hash/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/18.04.16/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-i386/by-hash/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/source/by-hash/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/source/by-hash/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-amd64/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/i18n/by-hash/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-i386/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-amd64/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-azure-amd64/4.15.0-1009.9/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/current/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-amd64/by-hash/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/current/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/i18n/by-hash/SHA256/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-i386/by-hash/SHA256/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/current/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-i386/by-hash/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/2.00-5ubuntu2/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-amd64/by-hash/SHA256/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/10-3/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-amd64/by-hash/SHA256/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/i18n/by-hash/SHA256/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-azure-amd64/current/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/current/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-i386/by-hash/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-i386/by-hash/SHA256/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/4.15.0-1004.4/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/current/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/dep11/by-hash/SHA256/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-i386/by-hash/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/i18n/by-hash/SHA256/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/10-3/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-i386/by-hash/SHA256/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/source/by-hash/SHA256/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-amd64/by-hash/SHA256/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/4.15.0-15.16/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/source/by-hash/SHA256/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/dep11/by-hash/SHA256/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dep11/by-hash/SHA256/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/source/by-hash/SHA256/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/current/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/i18n/by-hash/SHA256/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/source/by-hash/SHA256/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-amd64/current/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-amd64/4.15.0-20.21/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-i386/by-hash/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-amd64/by-hash/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-amd64/by-hash/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/current/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-amd64/by-hash/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-oem-amd64/4.15.0-1004.5/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-i386/by-hash/SHA256/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-oem-amd64/current/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/4.15.0-1002.3/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-amd64/by-hash/SHA256/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/2.02-2ubuntu8/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-i386/by-hash/SHA256/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/current/control/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/hd-media/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom-xen/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-i386/by-hash/SHA256/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-i386/by-hash/SHA256/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/hd-media/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/2.02-2ubuntu8/control/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/cdrom/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/current/control/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/hd-media/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/10-3/control/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/current/control/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-i386/by-hash/SHA256/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-amd64/by-hash/SHA256/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/current/control/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/hd-media/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/10-3/control/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-amd64/by-hash/SHA256/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/4.15.0-1002.3/control/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/current/control/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/4.15.0-15.16/control/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/2.00-5ubuntu2/control/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom-xen/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/cdrom/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/current/control/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/current/control/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/4.15.0-1004.4/control/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-amd64/by-hash/SHA256/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-amd64/by-hash/SHA256/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom-xen/cdrom/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/pxelinux.cfg/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/xen/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/xen/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/pxelinux.cfg/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/xen/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom-xen/cdrom/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/pxelinux.cfg/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/pxelinux.cfg/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/xen/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/cdrom/xen/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/cdrom/xen/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/amd64/ | |
45128 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom-xen/cdrom/xen/ | |
45130 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/amd64/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom-xen/cdrom/xen/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/i386/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/i386/ | |
45127 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/i386/boot-screens/ | |
45131 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/amd64/boot-screens/ | |
45133 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/i386/boot-screens/ | |
45132 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/amd64/pxelinux.cfg/ | |
45129 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/amd64/boot-screens/ | |
45136 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/amd64/pxelinux.cfg/ | |
45134 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/i386/pxelinux.cfg/ | |
45135 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/i386/pxelinux.cfg/ | |
197 urls crawled in 1.37 seconds (144.05 urls/second with 10 processes) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ python mt.py | |
4554606080 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/by-hash/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-amd64/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/i18n/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/source/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dep11/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-i386/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/by-hash/SHA256/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/source/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-amd64/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/i18n/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-i386/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/dep11/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/i18n/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-i386/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-amd64/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/source/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-amd64/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/dep11/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-i386/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/i18n/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/source/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-amd64/by-hash/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/source/by-hash/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/current/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/18.04.16/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/18.04.15/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dist-upgrader-all/18.04.17/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/i18n/by-hash/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-oem-amd64/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-azure-amd64/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-amd64/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-amd64/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-i386/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-i386/by-hash/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/source/by-hash/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dep11/by-hash/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-amd64/by-hash/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-i386/by-hash/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/dep11/by-hash/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-i386/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-amd64/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-amd64/by-hash/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-amd64/by-hash/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/source/by-hash/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-i386/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/i18n/by-hash/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-amd64/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-i386/by-hash/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-amd64/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-i386/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/dep11/by-hash/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/i18n/by-hash/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/source/by-hash/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-i386/by-hash/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/source/by-hash/SHA256/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-amd64/by-hash/SHA256/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/2.02-2ubuntu8/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/current/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/2.00-5ubuntu2/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/current/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/4.15.0-1004.4/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/current/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/current/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/4.15.0-15.16/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/current/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/10-3/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/4.15.0-1002.3/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/current/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/i18n/by-hash/SHA256/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/current/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/10-3/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/i18n/by-hash/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-oem-amd64/4.15.0-1004.5/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-oem-amd64/current/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/dep11/by-hash/SHA256/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-amd64/current/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-amd64/4.15.0-20.21/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-amd64/by-hash/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-azure-amd64/4.15.0-1009.9/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/signed/linux-azure-amd64/current/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/binary-i386/by-hash/SHA256/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-i386/by-hash/SHA256/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/binary-amd64/by-hash/SHA256/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/source/by-hash/SHA256/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-i386/by-hash/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/dep11/by-hash/SHA256/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-i386/by-hash/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-amd64/by-hash/SHA256/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/i18n/by-hash/SHA256/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-i386/by-hash/SHA256/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-amd64/by-hash/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-i386/by-hash/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-amd64/by-hash/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/source/by-hash/SHA256/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-amd64/by-hash/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/binary-amd64/by-hash/SHA256/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/dep11/by-hash/SHA256/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/source/by-hash/SHA256/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/binary-i386/by-hash/SHA256/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/i18n/by-hash/SHA256/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-i386/by-hash/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom-xen/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/hd-media/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/hd-media/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/cdrom/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/hd-media/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom-xen/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/2.02-2ubuntu8/control/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/current/control/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/current/control/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub2-amd64/current/control/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/10-3/control/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/4.15.0-1002.3/control/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/grub-efi-amd64-amd64/2.00-5ubuntu2/control/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-oem-amd64/current/control/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/current/control/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/cdrom/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/hd-media/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-amd64/current/control/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-amd64/4.15.0-15.16/control/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/10-3/control/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/linux-azure-amd64/4.15.0-1004.4/control/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/i18n/by-hash/SHA256/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/uefi/fwupdate-i386/current/control/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-amd64/by-hash/SHA256/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/debian-installer/binary-i386/by-hash/SHA256/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-i386/by-hash/SHA256/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-amd64/by-hash/SHA256/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-amd64/by-hash/SHA256/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/universe/debian-installer/binary-i386/by-hash/SHA256/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom-xen/cdrom/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/restricted/debian-installer/binary-amd64/by-hash/SHA256/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/multiverse/debian-installer/binary-i386/by-hash/SHA256/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/pxelinux.cfg/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/xen/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/cdrom/xen/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/pxelinux.cfg/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/xen/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/pxelinux.cfg/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/xen/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom-xen/cdrom/ | |
123145668116480 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/cdrom/xen/ | |
123145517010944 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/xen/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/pxelinux.cfg/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/i386/ | |
123145584168960 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/cdrom-xen/cdrom/xen/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/amd64/ | |
123145634537472 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/cdrom-xen/cdrom/xen/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/i386/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/i386/pxelinux.cfg/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/20101020ubuntu543/images/netboot/ubuntu-installer/i386/boot-screens/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/amd64/ | |
123145600958464 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/i386/boot-screens/ | |
123145567379456 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-i386/current/images/netboot/ubuntu-installer/i386/pxelinux.cfg/ | |
123145550589952 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/amd64/boot-screens/ | |
123145617747968 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/20101020ubuntu543/images/netboot/ubuntu-installer/amd64/pxelinux.cfg/ | |
123145651326976 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/amd64/pxelinux.cfg/ | |
123145533800448 processing http://be.archive.ubuntu.com/ubuntu/dists/bionic/main/installer-amd64/current/images/netboot/ubuntu-installer/amd64/boot-screens/ | |
197 urls crawled in 1.72 seconds (114.49 urls/second with 10 threads) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
beautifulsoup4==4.11.2 | |
certifi==2022.12.7 | |
charset-normalizer==3.1.0 | |
idna==3.4 | |
requests==2.28.2 | |
soupsieve==2.4 | |
urllib3==1.26.14 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Simple singlethreaded web crawler, following all a.href's that end in a '/'. | |
from urllib.parse import urljoin | |
from time import time | |
from bs4 import BeautifulSoup | |
import requests | |
root: str = 'http://be.archive.ubuntu.com/ubuntu/dists/bionic/' | |
if __name__ == '__main__': | |
seen: set[str] = set() | |
start = time() | |
with requests.Session() as session: | |
def crawl(url: str) -> None: | |
print(f'processing {url}') | |
html = session.get(url).content | |
soup = BeautifulSoup(html, 'html.parser') | |
paths = {urljoin(url, a.get('href')) for a in soup.find_all('a')} | |
for u in filter(lambda p: p.startswith(root) and p.endswith('/'), paths - seen): | |
seen.add(u) | |
crawl(u) | |
crawl(root) | |
print(f'{len(seen)} urls crawled in {time() - start:.2f} seconds ({len(seen) / (time() - start):.2f} urls/second)') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment