Skip to content

Instantly share code, notes, and snippets.

@xl7dev
Created June 18, 2020 12:54
Show Gist options
  • Save xl7dev/d1db984e7ef2170cd0ec2da5a19fe2b6 to your computer and use it in GitHub Desktop.
Save xl7dev/d1db984e7ef2170cd0ec2da5a19fe2b6 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# encoding: utf-8
"""
@author: xl7dev
"""
from aiohttp import ClientSession
import asyncio
import optparse
from pymongo import MongoClient
from lxml import etree
client = MongoClient('127.0.0.1', 27017)
db = client['test']
"""
> python3 async_requests.py urls.txt
"""
async def fetch(sem, url):
try:
async with sem:
async with ClientSession() as session:
async with session.get(url, timeout=20) as response:
status = response.status
headers = response.headers
resp = await response.text()
html = etree.HTML(resp)
title = html.xpath('//title/text()')[0].strip()
item = {"url": url, "status": status, "headers": headers, "title": title}
db.httpurl.insert_one(item)
except Exception as e:
print(url, e)
async def run(urls):
# create instance of Semaphore
sem = asyncio.Semaphore(500)
tasks = [asyncio.create_task(fetch(sem, url)) for url in urls]
responses = asyncio.gather(*tasks)
await responses
if __name__ == "__main__":
parser = optparse.OptionParser()
parser.add_option("-f", "--filename", dest="filename", help="Target filename")
options, _ = parser.parse_args()
if options.filename:
x = options.filename
urls = [
x.strip() if x.strip().startswith('http://') or x.strip().startswith('https://') else "http://" + x.strip()
for x in open(x)]
loop = asyncio.get_event_loop()
loop.run_until_complete(run(urls))
loop.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment