Created
October 21, 2015 05:36
-
-
Save mgingras/fe6c2327a5542802663e to your computer and use it in GitHub Desktop.
Create gevent ssl socket for high throughput requests on api endpoints
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import gevent | |
from gevent import socket, ssl | |
def verify_ssl(url): | |
if url.find('https://') == -1: | |
raise Exception('Expects https url for ssl connection') | |
def split_url_into_host_and_path(url): | |
url = url.replace('https://', '').replace('http://', '') # remove http/https | |
url = url.split('/') | |
host = url[0] | |
path = '/' + '/'.join(url[1:]) | |
return host, path | |
def parse_out_http_headers_and_body(http_resp_string): | |
# returns headers, body | |
header_seperator = http_resp_string.find('\r\n\r\n') | |
if header_seperator >= 0: | |
# Split out the headers | |
headerless_http_resp = http_resp_string[header_seperator+4:] | |
headers = http_resp_string[:header_seperator] | |
return headers, headerless_http_resp | |
else: | |
return None, http_resp_string | |
def gevent_ssl_socket_get(url): | |
verify_ssl(url) # Make sure that this is a https url | |
host, path = split_url_into_host_and_path(url) | |
# create socket | |
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) | |
# wrap socket with ssl | |
ssl_socket = ssl.wrap_socket( | |
sock, | |
cert_reqs=ssl.CERT_NONE, | |
) | |
ssl_socket.connect((host, 443)) | |
ssl_socket.settimeout(45.0) # Timeout request @ 45s, seems reasonable... | |
request = 'GET ' + path + ' HTTP/1.1\r\nHost: ' + host + '\r\n\r\n' | |
ssl_socket.write(request) | |
SOCK_READ_SIZE = 2048 | |
_data = ssl_socket.read(SOCK_READ_SIZE) | |
headers, body = parse_out_http_headers_and_body(_data) | |
if headers: | |
content_length = re.search('(?<=Content-Length: )[0-9]+', headers).group(0) | |
content_length = int(content_length) | |
# get content length | |
while len(body) < content_length: | |
body += ssl_socket.read(SOCK_READ_SIZE) | |
# Read while size of data buffer returned is as large as specified size | |
else: | |
raise Exception('No headers in http response: ' + str(_data)) | |
return None | |
# shutdown closes socket @ OS level instead of just decrementing socket counter and leaving for gc | |
sock.shutdown(socket.SHUT_RDWR) | |
sock.close() | |
try: | |
body = json.loads(body) | |
except: | |
raise Exception('Failed to parse out json from body') | |
return None | |
return body |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Using this on EC2 C3 large with 4 celery workers at concurrency 100 triggering 16 greenlets per task each requesting one url