Skip to content

Instantly share code, notes, and snippets.

@KorigamiK
Last active September 14, 2021 19:55
Show Gist options
  • Save KorigamiK/d363fbcfd0c25b6df9d267672996aa62 to your computer and use it in GitHub Desktop.
Save KorigamiK/d363fbcfd0c25b6df9d267672996aa62 to your computer and use it in GitHub Desktop.
Download jee advance paper solutions from VMC and also makes a crawljob for jdownloader
import re
from aiohttp import ClientSession as c
import asyncio
from bs4 import BeautifulSoup as bs
import json
template = "https://www.vidyamandir.com/jeeadvanced20{}.html"
template_2 = "https://www.vidyamandir.com/jeeadv20{}.html"
unnecessary = re.compile(r"\sfor.+dvanced\s(?=\d)")
async def main():
data = {}
async with c() as session:
async def get_link(url: str, year: int) -> str:
async with session.get(url) as resp:
soup = bs(await resp.text(), "html.parser")
data[year] = row = {}
for i in soup.findAll("tr"):
try:
name = unnecessary.sub(" ", i.find("td").text).strip().replace('Detailed Solutions and Questions', 'Solutions')
row['All ' + name if 'utions' in name else name] = i.find("a").get("href")
except AttributeError:
continue
if data[year] == {}:
del data[year]
tasks = []
for i in range(15, 21):
tasks.append(get_link(template.format(i) if i<=17 else template_2.format(i), i))
await asyncio.gather(*tasks)
with open("data.json", "w") as file:
json.dump(data, file, indent=4)
new_data = []
with open('data.crawljob', 'w') as file:
for i in data.values():
for k, v in i.items():
row = {
'text':v,
'filename':f'{k}.pdf',
# "autoConfirm": "TRUE",
# "autoStart": "TRUE",
"downloadFolder": "D:\\Documents\\Solutions\\pyq\\adv"
}
new_data.append(row)
json.dump(new_data, file)
asyncio.get_event_loop().run_until_complete(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment