top-1m.csv can be get it from alexa's website:
Created
October 5, 2015 09:06
-
-
Save jixunmoe/e67ee6924c13de4d2d52 to your computer and use it in GitHub Desktop.
Check https on sites
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import socket, ssl, threading, fnmatch | |
# Output log file | |
f_out = open('site_log.txt', 'w') | |
# Input, alexa top 1m sites | |
f_in = open('top-1m.csv', 'r') | |
reader = threading.Lock() | |
writer = threading.Lock() | |
printer = threading.Lock() | |
class SitesChecker(threading.Thread): | |
def __init__(self, threadNum): | |
self.threadNum = threadNum | |
threading.Thread.__init__(self) | |
def log(self, msg): | |
with printer: | |
print('[Thread %d] %s' % (self.threadNum, msg)) | |
def write(self, host, passed): | |
with writer: | |
f_out.write('%s: %s\n' % (host, 'OK' if passed else 'FAIL')) | |
# Check a site in a different thread | |
def run(self): | |
while 1: | |
host = self.next() | |
if not host: | |
self.log('Complete.'); | |
break | |
self.check(host) | |
# Get next url | |
def next(self): | |
with reader: | |
line = f_in.readline().rstrip('\n') | |
if line == '': | |
return False | |
return line.split(',')[1] | |
def check(self, host): | |
self.log('Check cert for %s..' % host) | |
passed = False | |
try: | |
ip = socket.getaddrinfo(host, 443)[0][4][0] | |
sock = socket.socket() | |
sock.settimeout(1.0) | |
sock.connect((ip, 443)) | |
sock = ssl.wrap_socket(sock, cert_reqs=ssl.CERT_REQUIRED, ca_certs='cacerts.txt') | |
cert = sock.getpeercert() | |
for field in cert['subjectAltName']: | |
if field[0] == 'DNS': | |
if fnmatch.fnmatch(host, field[1]): | |
passed = True | |
break | |
if not passed: | |
for field in cert['subject']: | |
if field[0][0] == 'commonName': | |
certhost = field[0][1] | |
if fnmatch.fnmatch(host, certhost): | |
passed = True | |
break | |
except: | |
self.log('Failed to connect, next ..') | |
self.write(host, passed) | |
if __name__ == "__main__": | |
lstThreads = [] | |
for i in range(1, 20): | |
sc = SitesChecker(i) | |
sc.start() | |
lstThreads.append(sc) | |
for t in lstThreads: | |
t.join() | |
f_out.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment