KorigamiK · September 14, 2021 19:55
diff --git a/Jee advance papers VMC.py b/Jee advance papers VMC.py
 import re
 from aiohttp import ClientSession as c
 import asyncio
 from bs4 import BeautifulSoup as bs
 import json

 template = "https://www.vidyamandir.com/jeeadvanced20{}.html"
 template_2 = "https://www.vidyamandir.com/jeeadv20{}.html"
 unnecessary = re.compile(r"\sfor.+dvanced\s(?=\d)")


 async def main():
    data = {}
    async with c() as session:

        async def get_link(url: str, year: int) -> str:
            async with session.get(url) as resp:
                soup = bs(await resp.text(), "html.parser")
                data[year] = row = {}
                for i in soup.findAll("tr"):
                    try:
                        name = unnecessary.sub(" ", i.find("td").text).strip().replace('Detailed Solutions and Questions', 'Solutions')
                        row['All ' + name if 'utions' in name else name] = i.find("a").get("href")
                    except AttributeError:
                        continue
                if data[year] == {}:
                    del data[year]

        tasks = []
        for i in range(15, 21):
            tasks.append(get_link(template.format(i) if i<=17 else template_2.format(i), i))

        await asyncio.gather(*tasks)
        with open("data.json", "w") as file:
            json.dump(data, file, indent=4)

        new_data = []
        with open('data.crawljob', 'w') as file:
            for i in data.values():
                for k, v in i.items():
                    row = {
                        'text':v,
                        'filename':f'{k}.pdf',
                        # "autoConfirm": "TRUE",
                        # "autoStart": "TRUE",
                        "downloadFolder": "D:\\Documents\\Solutions\\pyq\\adv"

                    }
                    new_data.append(row)

            json.dump(new_data, file)



 asyncio.get_event_loop().run_until_complete(main())
	import re
	from aiohttp import ClientSession as c
	import asyncio
	from bs4 import BeautifulSoup as bs
	import json

	template = "https://www.vidyamandir.com/jeeadvanced20{}.html"
	template_2 = "https://www.vidyamandir.com/jeeadv20{}.html"
	unnecessary = re.compile(r"\sfor.+dvanced\s(?=\d)")


	async def main():
	data = {}
	async with c() as session:

	async def get_link(url: str, year: int) -> str:
	async with session.get(url) as resp:
	soup = bs(await resp.text(), "html.parser")
	data[year] = row = {}
	for i in soup.findAll("tr"):
	try:
	name = unnecessary.sub(" ", i.find("td").text).strip().replace('Detailed Solutions and Questions', 'Solutions')
	row['All ' + name if 'utions' in name else name] = i.find("a").get("href")
	except AttributeError:
	continue
	if data[year] == {}:
	del data[year]

	tasks = []
	for i in range(15, 21):
	tasks.append(get_link(template.format(i) if i<=17 else template_2.format(i), i))

	await asyncio.gather(*tasks)
	with open("data.json", "w") as file:
	json.dump(data, file, indent=4)

	new_data = []
	with open('data.crawljob', 'w') as file:
	for i in data.values():
	for k, v in i.items():
	row = {
	'text':v,
	'filename':f'{k}.pdf',
	# "autoConfirm": "TRUE",
	# "autoStart": "TRUE",
	"downloadFolder": "D:\\Documents\\Solutions\\pyq\\adv"

	}
	new_data.append(row)

	json.dump(new_data, file)



	asyncio.get_event_loop().run_until_complete(main())