put all downloadable links in visionpre.txt
and run following
python iasmat.py --lfile visionpre.txt
| import functools | |
| # from termcolor import colored | |
| def debug(func): | |
| """Print the function signature and return value""" | |
| @functools.wraps(func) | |
| def wrapper_debug(*args, **kwargs): | |
| args_repr = [repr(a) for a in args] # 1 | |
| kwargs_repr = [f"{k}={v!r}" for k, v in kwargs.items()] # 2 | |
| signature = ", ".join(args_repr + kwargs_repr) # 3 | |
| print(f"\nCalling {func.__name__}({signature})") | |
| value = func(*args, **kwargs) | |
| # 4 | |
| print(f"{func.__name__!r} returned {value!r}") | |
| return value | |
| return wrapper_debug |
| import requests | |
| from bs4 import BeautifulSoup | |
| import csv | |
| import fire | |
| import re | |
| from termcolor import colored | |
| from req import getSoupFromURL | |
| from debugit import debug | |
| #################################### | |
| # handle google drive | |
| #################################### | |
| @debug | |
| def download_file_from_google_drive(id, destination): | |
| URL = "https://docs.google.com/uc?export=download" | |
| session = requests.Session() | |
| response = session.get(URL, params={'id': id}, stream=True) | |
| token = get_confirm_token(response) | |
| if token: | |
| params = {'id': id, 'confirm': token} | |
| response = session.get(URL, params=params, stream=True) | |
| print("saving file... " + destination) | |
| save_response_content(response, destination) | |
| def get_confirm_token(response): | |
| for key, value in response.cookies.items(): | |
| if key.startswith('download_warning'): | |
| return value | |
| return None | |
| def save_response_content(response, destination): | |
| CHUNK_SIZE = 32768 | |
| with open(destination, "wb") as f: | |
| for chunk in response.iter_content(CHUNK_SIZE): | |
| if chunk: # filter out keep-alive new chunks | |
| f.write(chunk) | |
| #################################### | |
| # helpers | |
| #################################### | |
| @debug | |
| def getFilename(link): | |
| link = link.split("test-")[1] | |
| rep = {"-": "_", "/": ""} # define desired replacements here | |
| # use these three lines to do the replacement | |
| return multireplace(link, rep) | |
| def multireplace(string, replacements): | |
| """ | |
| https://gist.github.com/bgusach/a967e0587d6e01e889fd1d776c5f3729 | |
| Given a string and a replacement map, it returns the replaced string. | |
| :param str string: string to execute replacements on | |
| :param dict replacements: replacement dictionary {value to find: value to replace} | |
| :rtype: str | |
| """ | |
| # Place longer ones first to keep shorter substrings from matching where the longer ones should take place | |
| # For instance given the replacements {'ab': 'AB', 'abc': 'ABC'} against the string 'hey abc', it should produce | |
| # 'hey ABC' and not 'hey ABc' | |
| substrs = sorted(replacements, key=len, reverse=True) | |
| # Create a big OR regex that matches any of the substrings to replace | |
| regexp = re.compile('|'.join(map(re.escape, substrs))) | |
| # For each match, look up the new string in the replacements | |
| return regexp.sub(lambda match: replacements[match.group(0)], string) | |
| def getDriveLinkfromSoup(soup): | |
| drive_links = soup.find_all( | |
| 'a', {'href': re.compile(r'drive\.google\.com/')}) | |
| paper = drive_links.pop(0)['href'] | |
| # update for insights ias | |
| if drive_links: | |
| solution = drive_links.pop(0)['href'] | |
| else: | |
| solution = None | |
| return [paper, solution] | |
| def getGfileid(l): | |
| u = [url.split("id=")[1] for url in l] | |
| if u: | |
| return u | |
| else: | |
| return None | |
| @debug | |
| def readLinks(fname): | |
| with open(fname) as f: | |
| content = f.readlines() | |
| # you may also want to remove whitespace characters like `\n` at the end of each line | |
| return [x.strip() for x in content] | |
| def download_allGdrive(ids, fnames): | |
| if len(ids) == len(fnames): | |
| return [download_file_from_google_drive(id, fname) for id, fname in zip(ids, fnames)] | |
| else: | |
| return None | |
| #################################### | |
| # mains functions | |
| #################################### | |
| def downloadit(lfile): | |
| links = readLinks(lfile) | |
| print("\n" + str(links)) | |
| for iasmat in links: | |
| soup = getSoupFromURL(iasmat) | |
| [p, s] = getDriveLinkfromSoup(soup) | |
| p_f = getFilename(iasmat) | |
| s_f = p_f + " Solution" | |
| [pfname, sfname] = [p + ".pdf" for p in [p_f, s_f]] | |
| if p and s: | |
| [p_id, s_id] = getGfileid([p, s]) | |
| download_allGdrive([p_id, s_id], [pfname, sfname]) | |
| else: | |
| [p_id] = getGfileid([p]) | |
| download_allGdrive([p_id], [pfname]) | |
| # def downloadit(l): | |
| # soup = getSoupFromURL(l) | |
| # [p, s] = getDriveLinkfromSoup(soup) | |
| # [p_id, s_id] = getGfileid([p, s]) | |
| # p_f = getFilename(l) | |
| # s_f = p_f + " Solution" | |
| # [pfname, sfname] = [p + ".pdf" for p in [p_f, s_f]] | |
| # if p_id: | |
| # download_file_from_google_drive(p_id, pfname) | |
| # if s_id: | |
| # download_file_from_google_drive(s_id, sfname) | |
| #################################### | |
| # set everything to Fire | |
| #################################### | |
| if __name__ == "__main__": | |
| fire.Fire(downloadit) |
| import warnings | |
| import contextlib | |
| import csv | |
| from bs4 import BeautifulSoup | |
| import requests | |
| from urllib3.exceptions import InsecureRequestWarning | |
| from termcolor import colored | |
| old_merge_environment_settings = requests.Session.merge_environment_settings | |
| @contextlib.contextmanager | |
| def no_ssl_verification(): | |
| opened_adapters = set() | |
| def merge_environment_settings(self, url, proxies, stream, verify, cert): | |
| # Verification happens only once per connection so we need to close | |
| # all the opened adapters once we're done. Otherwise, the effects of | |
| # verify=False persist beyond the end of this context manager. | |
| opened_adapters.add(self.get_adapter(url)) | |
| settings = old_merge_environment_settings( | |
| self, url, proxies, stream, verify, cert) | |
| settings['verify'] = False | |
| return settings | |
| requests.Session.merge_environment_settings = merge_environment_settings | |
| try: | |
| with warnings.catch_warnings(): | |
| warnings.simplefilter('ignore', InsecureRequestWarning) | |
| yield | |
| finally: | |
| requests.Session.merge_environment_settings = old_merge_environment_settings | |
| for adapter in opened_adapters: | |
| try: | |
| adapter.close() | |
| except: | |
| pass | |
| def getSoupFromURL(url): | |
| """ get beautifulsoup from url without sslwarmings""" | |
| with no_ssl_verification(): | |
| getme = requests.get(url, verify=False) | |
| # soup = BeautifulSoup(getme.content, "html5lib") | |
| soup = BeautifulSoup(getme.content, "lxml") | |
| return soup |
| https://iasmaterials.com/vision-ias-prelims-2019-test-31-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-abhyaas-test-1-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-30-with-solutions-full-test-6/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-29-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-28-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-27-with-solutions-full-test-4/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-26-with-solutions-full-test-3/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-25-with-solutions-full-test-2/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-24-with-solutions-full-test-1/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-23-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-22-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-21-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-20-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-19-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-18-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-17-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-16-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-15-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-14-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-13-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-12-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-11-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-10-solution-available-for-download/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-10-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-9-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-8-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-7-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-6-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-5-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-4-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-3-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-2-with-solutions/ | |
| https://iasmaterials.com/vision-ias-prelims-2019-test-1-with-solutions/ |