Created
June 18, 2020 12:54
-
-
Save xl7dev/d1db984e7ef2170cd0ec2da5a19fe2b6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
@author: xl7dev | |
""" | |
from aiohttp import ClientSession | |
import asyncio | |
import optparse | |
from pymongo import MongoClient | |
from lxml import etree | |
client = MongoClient('127.0.0.1', 27017) | |
db = client['test'] | |
""" | |
> python3 async_requests.py urls.txt | |
""" | |
async def fetch(sem, url): | |
try: | |
async with sem: | |
async with ClientSession() as session: | |
async with session.get(url, timeout=20) as response: | |
status = response.status | |
headers = response.headers | |
resp = await response.text() | |
html = etree.HTML(resp) | |
title = html.xpath('//title/text()')[0].strip() | |
item = {"url": url, "status": status, "headers": headers, "title": title} | |
db.httpurl.insert_one(item) | |
except Exception as e: | |
print(url, e) | |
async def run(urls): | |
# create instance of Semaphore | |
sem = asyncio.Semaphore(500) | |
tasks = [asyncio.create_task(fetch(sem, url)) for url in urls] | |
responses = asyncio.gather(*tasks) | |
await responses | |
if __name__ == "__main__": | |
parser = optparse.OptionParser() | |
parser.add_option("-f", "--filename", dest="filename", help="Target filename") | |
options, _ = parser.parse_args() | |
if options.filename: | |
x = options.filename | |
urls = [ | |
x.strip() if x.strip().startswith('http://') or x.strip().startswith('https://') else "http://" + x.strip() | |
for x in open(x)] | |
loop = asyncio.get_event_loop() | |
loop.run_until_complete(run(urls)) | |
loop.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment