Skip to content

Instantly share code, notes, and snippets.

@rainbowbird
Last active February 22, 2020 12:47
Show Gist options
  • Save rainbowbird/e6332648813481b57647ba2b856dcc23 to your computer and use it in GitHub Desktop.
Save rainbowbird/e6332648813481b57647ba2b856dcc23 to your computer and use it in GitHub Desktop.
Pipelining - Chaining Commands
"""Use generator to sum log file data on the fly
- Generators make good pipelines
- Useful for workflow problems
- Example parsing of a log file
log file content:
12
34
56
78
90
# comment
01
12
34
"""
import sys
import time
def read_forever(fobj):
"""Read from a file as long as there are lines.
Wait for the other process to write more lines.
"""
counter = 0
while True:
line = fobj.readline()
if not line:
time.sleep(0.1)
continue
yield line
def filter_comments(lines):
"""Filter out all lines starting with #
"""
for line in lines:
if not line.strip().startswith('#'):
yield line
def get_number(lines):
"""Read the number in the line and convert it to an integer.
"""
for line in lines:
yield int(line.split()[-1])
def show_sum(file_name='out.txt'):
"""Start all the generators and calculate the sum continuously.
"""
lines = read_forever(open(file_name))
filtered_lines = filter_comments(lines)
numbers = get_number(filtered_lines)
sum_ = 0
try:
for number in numbers:
sum_ += number
sys.stdout.write('sum: %d\r' % sum_)
sys.stdout.flush()
except KeyboardInterrupt:
print('sum:', sum_)
if __name__ == '__main__':
import sys
show_sum(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment