Skip to content

Instantly share code, notes, and snippets.

@Xnuvers007
Created November 23, 2023 05:50
Show Gist options
  • Save Xnuvers007/d6cdfdda9e58a8b6ddfa1d20a434b243 to your computer and use it in GitHub Desktop.
Save Xnuvers007/d6cdfdda9e58a8b6ddfa1d20a434b243 to your computer and use it in GitHub Desktop.
BING Scrape
from flask import Flask, request, jsonify
import requests
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from urllib.parse import urlparse
from html import escape
app = Flask(__name__)
def is_valid_query_bing(query):
try:
result = urlparse(query)
if not all([result.scheme, result.netloc]):
return True
return False
except ValueError:
return True
def bing_extract_results(result):
link = result.find('a', href=True)
paragraph = result.find('p', class_='b_lineclamp4 b_algoSlug')
if link:
href = link['href']
if paragraph:
return {"link": escape(href), "paragraph": escape(paragraph.text.strip())}
else:
return {"link": escape(href)}
return None
@app.route('/bing', methods=['GET'])
def bing_search():
query = request.args.get('search', type=str)
if not query:
return jsonify({"error": "Missing 'search' parameter"}), 400
if not is_valid_query_bing(query):
return jsonify({"error": "Invalid Query"}), 400
response_dict = {"query": escape(query), "suggestions": [], "results": []}
suggestion_url = "https://www.bing.com/AS/Suggestions?pt=page.home&mkt=en-us&qry=" + query + "&cp=0&msbqf=false&cvid=C41C6A7A87F04011ABDD42AE95D1E8FA"
try:
suggestion_response = requests.get(suggestion_url, timeout=(3, 10))
suggestion_response.raise_for_status()
suggestion_soup = BeautifulSoup(suggestion_response.text, "html.parser")
suggestion_items = suggestion_soup.select("li")
for item in suggestion_items:
response_dict["suggestions"].append(escape(item.text))
except requests.RequestException as e:
return jsonify({"error": f"Suggestions Error: {escape(str(e))}"}), 500
search_url = "https://www.bing.com/search?q=" + query
search_response = None
max_attempts = 3
attempts = 0
while attempts < max_attempts:
try:
session = requests.Session()
retries = Retry(total=max_attempts, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504])
session.mount('http://', HTTPAdapter(max_retries=retries))
session.mount('https://', HTTPAdapter(max_retries=retries))
search_response = session.get(search_url, timeout=(3, 10))
search_response.raise_for_status()
break
except requests.RequestException as e:
attempts += 1
return jsonify({"error": f"Search Error: {escape(str(e))}. Retrying... (Attempt {attempts}/{max_attempts})"}), 500
finally:
session.close()
if search_response:
search_soup = BeautifulSoup(search_response.text, 'html.parser')
results = search_soup.find_all('li', class_='b_algo')
for result in results:
extracted_result = bing_extract_results(result)
if extracted_result:
response_dict["results"].append(extracted_result)
return jsonify(response_dict)
if __name__ == '__main__':
app.run(debug=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment