Created
November 23, 2023 05:50
-
-
Save Xnuvers007/d6cdfdda9e58a8b6ddfa1d20a434b243 to your computer and use it in GitHub Desktop.
BING Scrape
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Flask, request, jsonify | |
import requests | |
from bs4 import BeautifulSoup | |
from requests.adapters import HTTPAdapter | |
from urllib3.util.retry import Retry | |
from urllib.parse import urlparse | |
from html import escape | |
app = Flask(__name__) | |
def is_valid_query_bing(query): | |
try: | |
result = urlparse(query) | |
if not all([result.scheme, result.netloc]): | |
return True | |
return False | |
except ValueError: | |
return True | |
def bing_extract_results(result): | |
link = result.find('a', href=True) | |
paragraph = result.find('p', class_='b_lineclamp4 b_algoSlug') | |
if link: | |
href = link['href'] | |
if paragraph: | |
return {"link": escape(href), "paragraph": escape(paragraph.text.strip())} | |
else: | |
return {"link": escape(href)} | |
return None | |
@app.route('/bing', methods=['GET']) | |
def bing_search(): | |
query = request.args.get('search', type=str) | |
if not query: | |
return jsonify({"error": "Missing 'search' parameter"}), 400 | |
if not is_valid_query_bing(query): | |
return jsonify({"error": "Invalid Query"}), 400 | |
response_dict = {"query": escape(query), "suggestions": [], "results": []} | |
suggestion_url = "https://www.bing.com/AS/Suggestions?pt=page.home&mkt=en-us&qry=" + query + "&cp=0&msbqf=false&cvid=C41C6A7A87F04011ABDD42AE95D1E8FA" | |
try: | |
suggestion_response = requests.get(suggestion_url, timeout=(3, 10)) | |
suggestion_response.raise_for_status() | |
suggestion_soup = BeautifulSoup(suggestion_response.text, "html.parser") | |
suggestion_items = suggestion_soup.select("li") | |
for item in suggestion_items: | |
response_dict["suggestions"].append(escape(item.text)) | |
except requests.RequestException as e: | |
return jsonify({"error": f"Suggestions Error: {escape(str(e))}"}), 500 | |
search_url = "https://www.bing.com/search?q=" + query | |
search_response = None | |
max_attempts = 3 | |
attempts = 0 | |
while attempts < max_attempts: | |
try: | |
session = requests.Session() | |
retries = Retry(total=max_attempts, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]) | |
session.mount('http://', HTTPAdapter(max_retries=retries)) | |
session.mount('https://', HTTPAdapter(max_retries=retries)) | |
search_response = session.get(search_url, timeout=(3, 10)) | |
search_response.raise_for_status() | |
break | |
except requests.RequestException as e: | |
attempts += 1 | |
return jsonify({"error": f"Search Error: {escape(str(e))}. Retrying... (Attempt {attempts}/{max_attempts})"}), 500 | |
finally: | |
session.close() | |
if search_response: | |
search_soup = BeautifulSoup(search_response.text, 'html.parser') | |
results = search_soup.find_all('li', class_='b_algo') | |
for result in results: | |
extracted_result = bing_extract_results(result) | |
if extracted_result: | |
response_dict["results"].append(extracted_result) | |
return jsonify(response_dict) | |
if __name__ == '__main__': | |
app.run(debug=True) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment