Created
November 19, 2023 11:43
-
-
Save amuradyan/472b688062bf789f1fb2f451843b3a40 to your computer and use it in GitHub Desktop.
FIltering broken links
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
def filter_working_urls(input_file_path, output_file_path): | |
""" | |
Reads URLs from a file, checks if they are working, and writes the working URLs into another file. | |
Also counts the number of working and broken URLs. | |
:param input_file_path: Path to the file containing the URLs. | |
:param output_file_path: Path to the file where working URLs will be written. | |
:return: Tuple (number of working URLs, number of broken URLs) | |
""" | |
working_count = 0 | |
broken_count = 0 | |
with open(input_file_path, 'r') as input_file, open(output_file_path, 'w') as output_file: | |
for url in input_file: | |
url = url.strip() | |
try: | |
response = requests.get(url, timeout=5) | |
if 200 <= response.status_code < 300: | |
output_file.write(url + '\n') | |
working_count += 1 | |
else: | |
broken_count += 1 | |
except requests.RequestException: | |
broken_count += 1 | |
return working_count, broken_count | |
def main(): | |
# Replace these with your file paths | |
input_file_path = 'input_urls.txt' | |
output_file_path = 'working_urls.txt' | |
working_count, broken_count = filter_working_urls(input_file_path, output_file_path) | |
print(f"Filtering complete. {working_count} working URLs and {broken_count} broken URLs.") | |
print("Ձեր բռնած գորձին էլ հաջողություն") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment