This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import requests | |
from bs4 import BeautifulSoup | |
from concurrent.futures import ThreadPoolExecutor | |
import json | |
from requests.exceptions import InvalidURL,TooManyRedirects | |
from requests import ReadTimeout, ConnectTimeout, HTTPError, Timeout, ConnectionError | |
def get_html_array(js_url): | |
response = requests.get(js_url) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_html_array(js_url): | |
response = requests.get(js_url) | |
# Apply capture logic | |
raw_js_array = response.text # Apply your capture logic; note raw_js_array is a string | |
result = raw_js_array.find("var html = ") | |
end = raw_js_array.find(".join('\n')") | |
json_file = [] | |
for data in range(result+12,end-1): | |
json_file.append(data) | |
python_list = json.loads(json_file) # use a proper variable names |