Last active
May 3, 2023 07:06
-
-
Save rishi-raj-jain/ad01c4635b49427a05428136e3053b64 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, re, requests, shutil, asyncio, time, aiohttp | |
root = next(os.walk('.'))[1] | |
root.remove('.git') | |
pattern = r'https?://(?:[^\s()<>{}\[\]]+\.(?:jpg|jpeg|gif|png))(?![^\s]*\))' | |
async def get(url, session): | |
try: | |
async with session.get(url=url) as response: | |
resp= await response.read() | |
print("Successfully got url {} with resp of length {}.".format(url, len(resp))) | |
file_name = 'assets-www/img/cloudinary/'+url[url.rfind('/'):] | |
with open(file_name,'wb+') as f: | |
f.write(resp) | |
except Exception as e: | |
print(e) | |
print("Unable to get url {} due to {}.".format(url, e.__class__)) | |
async def main(urls): | |
async with aiohttp.ClientSession() as session: | |
ret = await asyncio.gather(*[get(url, session) for url in urls]) | |
print("Finalized all. Return is a list of len {} outputs.".format(len(ret))) | |
urls = [] | |
def replaceFn(match): | |
return ('/assets-www/img/cloudinary/' + match.group(0).split('/')[-1]) | |
for eachRoot in root: | |
for path, subdirs, files in os.walk(eachRoot): | |
for name in files: | |
test = open(os.path.join(path, name), 'r') | |
try: | |
tmp = test.read() | |
cloudinaryImages = re.findall(pattern, tmp) | |
if len(cloudinaryImages) > 0: | |
for i in cloudinaryImages: | |
urls.append(i) | |
with open(os.path.join(path, name), 'w+') as tmpFile: | |
tmpFile.write(re.sub(pattern, replaceFn, tmp)) | |
except Exception as e: | |
pass | |
start = time.time() | |
asyncio.run(main(urls)) | |
end = time.time() | |
print("Took {} seconds to pull {} websites.".format(end - start, len(urls))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment