Skip to content

Instantly share code, notes, and snippets.

@jl
Created December 12, 2014 01:22
Show Gist options
  • Save jl/6ced11e66a427582d7e8 to your computer and use it in GitHub Desktop.
Save jl/6ced11e66a427582d7e8 to your computer and use it in GitHub Desktop.
python subprocess stdout iteration
"""diff output of multiple processes and exit as soon as one line is different
Note: we have to take care to not trigger any of Python's internal buffering mechanisms.
"""
import itertools
import subprocess
import sys
LINE_BUFFERED = 1
p1 = subprocess.Popen(['python', 'writeslow.py'], stdout=subprocess.PIPE, bufsize=LINE_BUFFERED)
p2 = subprocess.Popen(['python', 'writeslow.py'], stdout=subprocess.PIPE, bufsize=LINE_BUFFERED)
# If you iterate over stdout directly, it will internally try to buffer too much
# (in experiments, seems like it buffers the whole output!).
# So we adapt readline() into an iterator.
it1 = iter(p1.stdout.readline, '')
it2 = iter(p2.stdout.readline, '')
# Python's built-in zip() returns a list, which reads in all lines into memory.
# We only want one line at a time to be read from the pipes.
z = itertools.izip(it1, it2)
# Find the first line that is different, and read no more than that.
line_num, lines = next((i+1, lines) for i, lines in enumerate(z) if lines[0] != lines[1])
print 'different line {}'.format(line_num)
for line in lines:
print 'content: {!r}'.format(line)
"""write a bunch of lines out real slow"""
import random
import sys
import time
for i in xrange(50000):
if i == 3:
# On the third line, output something different.
print random.random()
else:
print i
sys.stdout.flush()
# Make output trickle out slowly.
time.sleep(0.1)
# Should never get here, since process should abort after line 3.
sys.stderr.write('writer done')
@jl
Copy link
Author

jl commented Dec 12, 2014

Of course, you can accomplish mostly the same thing in a few lines of bash, but here it is.

@zacharysyoung
Copy link

zacharysyoung commented May 11, 2021

Hey, I came across this while reading up on iterators and IO streams. I decided to convert to Python3 to exercise those two subject-matter areas. Not much changed, mostly print() and some updates to match changes in the subprocess API.

If you or anyone else is interested:

"""write a bunch of lines out real slow"""
import random
import sys
import time


for i in range(50000):
    if i == 3:
        # On the third line, output something different.
        print(random.random(), flush=True)
    else:
        print(i, flush=True)

    # Make output trickle out slowly.
    time.sleep(0.5)

# Should never get here, since process should abort after line 3.
sys.stderr.write('writer done')
"""diff output of multiple processes and exit as soon as one line is different
Note: we have to take care to not trigger any of Python's internal buffering mechanisms.
"""

import subprocess

LINE_BUFFERED = 1
p1 = subprocess.Popen(['python3', 'writeslow.py'], stdout=subprocess.PIPE, bufsize=LINE_BUFFERED, text=True)  # text=True forces text-mode to use bufsize
p2 = subprocess.Popen(['python3', 'writeslow.py'], stdout=subprocess.PIPE, bufsize=LINE_BUFFERED, text=True)

# If you iterate over stdout directly, it will internally try to buffer too much
# (in experiments, seems like it buffers the whole output!).
# So we adapt readline() into an iterator.
it1 = iter(p1.stdout.readline, b'')
it2 = iter(p2.stdout.readline, b'')

# Iterate both files line-for-line
z = zip(it1, it2)

# Find the first line that is different, and read no more than that.
line_num, lines = next((i+1, lines) for i, lines in enumerate(z) if lines[0] != lines[1])
print(f'different line {line_num}')
for line in lines:
    print(f'content: {line!r}')

# Avoid BrokenPipeError / IOError [Error 32]
p1.kill()
p2.kill()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment