Skip to content

Instantly share code, notes, and snippets.

@Xnuvers007
Last active August 20, 2024 16:26
Show Gist options
  • Save Xnuvers007/2afc71474d3f28f9cbee7d2ecc661cfa to your computer and use it in GitHub Desktop.
Save Xnuvers007/2afc71474d3f28f9cbee7d2ecc661cfa to your computer and use it in GitHub Desktop.
Scraping www.isitdownrightnow.com for checking website down or up
from flask import Flask, request, jsonify
import requests
from requests.exceptions import RequestException
import time
app = Flask(__name__)
def normalize_url(url):
# Add 'http://' if no scheme is present
if not url.startswith(('http://', 'https://')):
return f'http://{url}'
return url
def check_url_status(url):
normalized_url = normalize_url(url)
start_time = time.time()
# Try HTTPS first, if URL does not start with "https://"
if normalized_url.startswith("http://"):
https_url = normalized_url.replace("http://", "https://", 1)
try:
response = requests.get(https_url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'})
elapsed_time = time.time() - start_time
return response, elapsed_time, https_url
except RequestException:
pass
# Try HTTP if HTTPS failed
try:
response = requests.get(normalized_url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'})
elapsed_time = time.time() - start_time
return response, elapsed_time, normalized_url
except RequestException:
pass
# Return None if both HTTP and HTTPS failed
return None, None, None
@app.route('/check-url', methods=['GET'])
def check_url():
url = request.args.get('url')
if not url:
return jsonify({
'status': 0,
'statusText': 'URL parameter is missing',
'isUP': False,
'isDown': True,
'responseTime': None,
'redirects': None,
'headers': None,
'zbody': None,
'url': None
}), 400
try:
response, response_time, final_url = check_url_status(url)
if response:
redirects = response.history if response.history else []
redirect_urls = [r.url for r in redirects] + [response.url]
return jsonify({
'status': response.status_code,
'statusText': response.reason,
'isUP': response.status_code == 200,
'isDown': response.status_code != 200,
'responseTime': response_time,
'redirects': redirect_urls if redirects else None,
'headers': dict(response.headers),
'zbody': response.text,
'url': final_url
}), 200
else:
return jsonify({
'status': 0,
'statusText': 'Both HTTP and HTTPS requests failed',
'isUP': False,
'isDown': True,
'responseTime': None,
'redirects': None,
'headers': None,
'zbody': None,
'url': normalize_url(url)
}), 200
except Exception as e:
return jsonify({
'status': 0,
'statusText': str(e),
'isUP': False,
'isDown': True,
'responseTime': None,
'redirects': None,
'headers': None,
'zbody': None,
'url': normalize_url(url)
}), 500
if __name__ == '__main__':
app.run(debug=False)
import requests
from bs4 import BeautifulSoup
from prettytable import PrettyTable
import threading, psutil, re
from urllib.parse import urlparse
MAX_RETRY_ATTEMPTS = 5
def process_website(website, user, headers, headers2, params):
attempts = 0
success = False
while attempts < MAX_RETRY_ATTEMPTS and not success:
try:
response = requests.get(website, headers=headers, params=params, timeout=25)
response.raise_for_status() # Raises an HTTPError for bad responses (4xx or 5xx)
soup = BeautifulSoup(response.text, 'html.parser')
if website == 'https://www.isitdownrightnow.com/check.php':
website_name = soup.find('span', class_='tab').text
url_checked = soup.find_all('span', class_='tab')[1].text
response_time = soup.find_all('span', class_='tab')[2].text
last_down = soup.find_all('span', class_='tab')[3].text
status = soup.find('div', class_='statusup').text
# Print the extracted information
print(f"Website Name: {website_name}")
print(f"URL Checked: {url_checked}")
print(f"Response Time: {response_time}")
print(f"Last Down: {last_down}")
if status == 'UP':
print(f"Status: {status}")
else:
print(f"Status: {status}")
else:
# url = f'https://www.isitdownrightnow.com/{user}.html'
table = soup.find('table', {'width': '100%', 'cellspacing': '1', 'cellpadding': '1', 'border': '0'})
rows = table.find_all('tr')
data = []
for row in rows:
cols = row.find_all(['td', 'th'])
cols = [col.text.strip() for col in cols]
data.append(cols)
header = data[0]
unique_header = []
seen = set()
for field in header:
if field in seen:
i = 1
new_field = f"{field}_{i}"
while new_field in seen:
i += 1
new_field = f"{field}_{i}"
unique_header.append(new_field)
else:
unique_header.append(field)
seen.add(field)
data = data[1:]
table = PrettyTable(unique_header)
for row in data:
if len(row) == len(unique_header):
table.add_row(row)
else:
print(f"Skipping row: {row} - Incorrect number of values")
print(table)
last_checked_and_down_right_now = soup.find_all('div', {'class': 'rightdiv'})
for last_checked in last_checked_and_down_right_now:
teks = last_checked.text.strip()
substring = '''Website Status Checker Bookmarklet
Once added to your toolbar, this button will let you to check the status of a site from your browser's toolbar.
Just drag the text your bookmarks bar : Down Right Now?'''
if substring in teks:
teks2 = teks.replace(substring, "").strip()
print(teks2)
else:
print(teks.strip())
success = True
except requests.exceptions.RequestException as e:
attempts += 1
print(f"Error processing {website}: {e}. Retrying ({attempts}/{MAX_RETRY_ATTEMPTS})")
if not success:
print(f"Failed to process {website} after {MAX_RETRY_ATTEMPTS} attempts.")
if __name__ == "__main__":
user = str(input("Enter the website name: "))
# remove https:// http:// and subdomain example www. just domain like google.com
user = re.sub(r'^(https?://)?(www\.)?', '', user)
# if user.startswith('http://'):
# user = user.replace('http://', '')
# elif user.startswith('https://'):
# user = user.replace('https://', '')
# elif user.startswith('www.'):
# user = user.replace('www.', '')
# elif user.startswith('http://www.'):
# user = user.replace('http://www.', '')
# elif user.startswith('https://www.'):
# user = user.replace('https://www.', '')
# else:
# pass
# user = urlparse(user).netloc
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'id,en-US;q=0.7,en;q=0.3',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
}
headers2 = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'id,en-US;q=0.7,en;q=0.3',
'Referer': 'https://www.isitdownrightnow.com/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
}
params = {
'domain': user,
}
websites = ['https://www.isitdownrightnow.com/check.php',
f'https://www.isitdownrightnow.com/{user}.html']
threads = []
for website in websites:
thread = threading.Thread(target=process_website, args=(website, user, headers, headers2, params))
thread.start()
threads.append(thread)
for thread in threads:
thread.join()
print("\nCPU Usage Information:")
print(f"CPU Usage: {psutil.cpu_percent(interval=1)}%")
print(f"CPU Count: {psutil.cpu_count()}")
print(f"CPU Frequency: {psutil.cpu_freq()}")
print(f"CPU Times: {psutil.cpu_times()}")
print(f"CPU Stats: {psutil.cpu_stats()}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment