Skip to content

Instantly share code, notes, and snippets.

@kshirsagarsiddharth
Created September 1, 2020 12:06
Show Gist options
  • Select an option

  • Save kshirsagarsiddharth/67098558af94a9adfab16ccb8e763a53 to your computer and use it in GitHub Desktop.

Select an option

Save kshirsagarsiddharth/67098558af94a9adfab16ccb8e763a53 to your computer and use it in GitHub Desktop.
crawler4
print("The Crawler is started")
base_url = input("Please Enter Website to Crawl > ")
number_of_threads = input("Please Enter number of Threads > ")
links_to_crawl = queue.Queue()
url_lock = threading.Lock()
links_to_crawl.put(base_url)
have_visited = set()
crawler_threads = []
error_links = []
#base_url, links_to_crawl,have_visited, error_links,url_lock
for i in range(int(number_of_threads)):
crawler = Crawler(base_url = base_url,
links_to_crawl= links_to_crawl,
have_visited= have_visited,
error_links= error_links,
url_lock=url_lock)
crawler.start()
crawler_threads.append(crawler)
for crawler in crawler_threads:
crawler.join()
print(f"Total Number of pages visited are {len(have_visited)}")
print(f"Total Number of Errornous links: {len(error_links)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment