Last active
August 20, 2024 16:26
-
-
Save Xnuvers007/2afc71474d3f28f9cbee7d2ecc661cfa to your computer and use it in GitHub Desktop.
Scraping www.isitdownrightnow.com for checking website down or up
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Flask, request, jsonify | |
import requests | |
from requests.exceptions import RequestException | |
import time | |
app = Flask(__name__) | |
def normalize_url(url): | |
# Add 'http://' if no scheme is present | |
if not url.startswith(('http://', 'https://')): | |
return f'http://{url}' | |
return url | |
def check_url_status(url): | |
normalized_url = normalize_url(url) | |
start_time = time.time() | |
# Try HTTPS first, if URL does not start with "https://" | |
if normalized_url.startswith("http://"): | |
https_url = normalized_url.replace("http://", "https://", 1) | |
try: | |
response = requests.get(https_url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'}) | |
elapsed_time = time.time() - start_time | |
return response, elapsed_time, https_url | |
except RequestException: | |
pass | |
# Try HTTP if HTTPS failed | |
try: | |
response = requests.get(normalized_url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'}) | |
elapsed_time = time.time() - start_time | |
return response, elapsed_time, normalized_url | |
except RequestException: | |
pass | |
# Return None if both HTTP and HTTPS failed | |
return None, None, None | |
@app.route('/check-url', methods=['GET']) | |
def check_url(): | |
url = request.args.get('url') | |
if not url: | |
return jsonify({ | |
'status': 0, | |
'statusText': 'URL parameter is missing', | |
'isUP': False, | |
'isDown': True, | |
'responseTime': None, | |
'redirects': None, | |
'headers': None, | |
'zbody': None, | |
'url': None | |
}), 400 | |
try: | |
response, response_time, final_url = check_url_status(url) | |
if response: | |
redirects = response.history if response.history else [] | |
redirect_urls = [r.url for r in redirects] + [response.url] | |
return jsonify({ | |
'status': response.status_code, | |
'statusText': response.reason, | |
'isUP': response.status_code == 200, | |
'isDown': response.status_code != 200, | |
'responseTime': response_time, | |
'redirects': redirect_urls if redirects else None, | |
'headers': dict(response.headers), | |
'zbody': response.text, | |
'url': final_url | |
}), 200 | |
else: | |
return jsonify({ | |
'status': 0, | |
'statusText': 'Both HTTP and HTTPS requests failed', | |
'isUP': False, | |
'isDown': True, | |
'responseTime': None, | |
'redirects': None, | |
'headers': None, | |
'zbody': None, | |
'url': normalize_url(url) | |
}), 200 | |
except Exception as e: | |
return jsonify({ | |
'status': 0, | |
'statusText': str(e), | |
'isUP': False, | |
'isDown': True, | |
'responseTime': None, | |
'redirects': None, | |
'headers': None, | |
'zbody': None, | |
'url': normalize_url(url) | |
}), 500 | |
if __name__ == '__main__': | |
app.run(debug=False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
from prettytable import PrettyTable | |
import threading, psutil, re | |
from urllib.parse import urlparse | |
MAX_RETRY_ATTEMPTS = 5 | |
def process_website(website, user, headers, headers2, params): | |
attempts = 0 | |
success = False | |
while attempts < MAX_RETRY_ATTEMPTS and not success: | |
try: | |
response = requests.get(website, headers=headers, params=params, timeout=25) | |
response.raise_for_status() # Raises an HTTPError for bad responses (4xx or 5xx) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
if website == 'https://www.isitdownrightnow.com/check.php': | |
website_name = soup.find('span', class_='tab').text | |
url_checked = soup.find_all('span', class_='tab')[1].text | |
response_time = soup.find_all('span', class_='tab')[2].text | |
last_down = soup.find_all('span', class_='tab')[3].text | |
status = soup.find('div', class_='statusup').text | |
# Print the extracted information | |
print(f"Website Name: {website_name}") | |
print(f"URL Checked: {url_checked}") | |
print(f"Response Time: {response_time}") | |
print(f"Last Down: {last_down}") | |
if status == 'UP': | |
print(f"Status: {status}") | |
else: | |
print(f"Status: {status}") | |
else: | |
# url = f'https://www.isitdownrightnow.com/{user}.html' | |
table = soup.find('table', {'width': '100%', 'cellspacing': '1', 'cellpadding': '1', 'border': '0'}) | |
rows = table.find_all('tr') | |
data = [] | |
for row in rows: | |
cols = row.find_all(['td', 'th']) | |
cols = [col.text.strip() for col in cols] | |
data.append(cols) | |
header = data[0] | |
unique_header = [] | |
seen = set() | |
for field in header: | |
if field in seen: | |
i = 1 | |
new_field = f"{field}_{i}" | |
while new_field in seen: | |
i += 1 | |
new_field = f"{field}_{i}" | |
unique_header.append(new_field) | |
else: | |
unique_header.append(field) | |
seen.add(field) | |
data = data[1:] | |
table = PrettyTable(unique_header) | |
for row in data: | |
if len(row) == len(unique_header): | |
table.add_row(row) | |
else: | |
print(f"Skipping row: {row} - Incorrect number of values") | |
print(table) | |
last_checked_and_down_right_now = soup.find_all('div', {'class': 'rightdiv'}) | |
for last_checked in last_checked_and_down_right_now: | |
teks = last_checked.text.strip() | |
substring = '''Website Status Checker Bookmarklet | |
Once added to your toolbar, this button will let you to check the status of a site from your browser's toolbar. | |
Just drag the text your bookmarks bar : Down Right Now?''' | |
if substring in teks: | |
teks2 = teks.replace(substring, "").strip() | |
print(teks2) | |
else: | |
print(teks.strip()) | |
success = True | |
except requests.exceptions.RequestException as e: | |
attempts += 1 | |
print(f"Error processing {website}: {e}. Retrying ({attempts}/{MAX_RETRY_ATTEMPTS})") | |
if not success: | |
print(f"Failed to process {website} after {MAX_RETRY_ATTEMPTS} attempts.") | |
if __name__ == "__main__": | |
user = str(input("Enter the website name: ")) | |
# remove https:// http:// and subdomain example www. just domain like google.com | |
user = re.sub(r'^(https?://)?(www\.)?', '', user) | |
# if user.startswith('http://'): | |
# user = user.replace('http://', '') | |
# elif user.startswith('https://'): | |
# user = user.replace('https://', '') | |
# elif user.startswith('www.'): | |
# user = user.replace('www.', '') | |
# elif user.startswith('http://www.'): | |
# user = user.replace('http://www.', '') | |
# elif user.startswith('https://www.'): | |
# user = user.replace('https://www.', '') | |
# else: | |
# pass | |
# user = urlparse(user).netloc | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', | |
'Accept-Language': 'id,en-US;q=0.7,en;q=0.3', | |
'Connection': 'keep-alive', | |
'Upgrade-Insecure-Requests': '1', | |
'Sec-Fetch-Dest': 'document', | |
'Sec-Fetch-Mode': 'navigate', | |
'Sec-Fetch-Site': 'none', | |
'Sec-Fetch-User': '?1', | |
} | |
headers2 = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', | |
'Accept-Language': 'id,en-US;q=0.7,en;q=0.3', | |
'Referer': 'https://www.isitdownrightnow.com/', | |
'Connection': 'keep-alive', | |
'Upgrade-Insecure-Requests': '1', | |
'Sec-Fetch-Dest': 'document', | |
'Sec-Fetch-Mode': 'navigate', | |
'Sec-Fetch-Site': 'same-origin', | |
'Sec-Fetch-User': '?1', | |
} | |
params = { | |
'domain': user, | |
} | |
websites = ['https://www.isitdownrightnow.com/check.php', | |
f'https://www.isitdownrightnow.com/{user}.html'] | |
threads = [] | |
for website in websites: | |
thread = threading.Thread(target=process_website, args=(website, user, headers, headers2, params)) | |
thread.start() | |
threads.append(thread) | |
for thread in threads: | |
thread.join() | |
print("\nCPU Usage Information:") | |
print(f"CPU Usage: {psutil.cpu_percent(interval=1)}%") | |
print(f"CPU Count: {psutil.cpu_count()}") | |
print(f"CPU Frequency: {psutil.cpu_freq()}") | |
print(f"CPU Times: {psutil.cpu_times()}") | |
print(f"CPU Stats: {psutil.cpu_stats()}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment