Skip to content

Instantly share code, notes, and snippets.

@mdeous
Last active December 17, 2015 23:59
Show Gist options
  • Save mdeous/5693631 to your computer and use it in GitHub Desktop.
Save mdeous/5693631 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
from threading import Thread
from Queue import Empty, Queue
from urlparse import urlparse
todo = Queue()
done = Queue()
incoming_data = True
finished_processing = False
class Parsing(Thread):
def __init__(self, queue):
self.queue = queue
super(Parsing, self).__init__()
def run(self):
global finished_processing
while incoming_data or not todo.empty():
try:
url = todo.get_nowait()
except Empty:
continue
parsed = urlparse(url)
done.put(parsed.netloc)
finished_processing = True
class Storage(Thread):
def __init__(self, queue, outfile):
self.queue = queue
self.outfile = outfile
super(Storage, self).__init__()
def run(self):
with open(self.outfile, 'w') as of:
while not finished_processing or not done.empty():
try:
data = done.get_nowait()
except Empty:
continue
of.write(data + '\n')
parser = Parsing(todo)
store = Storage(done, 'output.txt')
parser.start()
store.start()
for line in sys.stdin:
line = line.strip()
if not line:
continue
todo.put(line)
incoming_data = False
parser.join()
store.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment