Skip to content

Instantly share code, notes, and snippets.

@yuwash
Last active November 26, 2017 16:40
Show Gist options
  • Save yuwash/7f3a7934bf75349bc1b99a34c571fdf4 to your computer and use it in GitHub Desktop.
Save yuwash/7f3a7934bf75349bc1b99a34c571fdf4 to your computer and use it in GitHub Desktop.
read stdin, find conversations (quoted text, whereas first conversation of a line needs to begin at the line start)
#! /usr/bin/env python3
from itertools import islice
from find_conversations import read
class Census:
def __init__(self, represent=None):
self.data = {}
self.represent = represent
def __contains__(self, key):
if self.represent is None:
return key in self.data
return self.represent(key) in self.data
def __getitem__(self, key):
if self.represent is None:
return self.data[key]
return self.data[self.represent(key)]
def __setitem__(self, key, value):
if self.represent is None:
self.data[key] = value
else:
self.data[self.represent(key)] = value
def add(self, item, count=1):
if item in self:
self[item] += count
else:
self[item] = count
def count(self, items):
for item in items:
self.add(item)
def __len__(self):
return sum(self.data.values())
def min(self):
return min(self.data)
def max(self):
return max(self.data)
def iterByCount(self):
remaining = set(self.data.keys())
while remaining:
item = max(remaining, key=self.__getitem__)
yield item
remaining.remove(item)
if __name__ == '__main__':
charsCensus = Census()
for line in read():
charsCensus.count(line)
print('of {}, range: [{},{}]'.format(
len(charsCensus), ord(charsCensus.min()), ord(charsCensus.max()),
))
print(
'most frequent characters: '
+ repr(list(islice(charsCensus.iterByCount(), 0, 10))))
#! /usr/bin/env python3
def conversations(text):
'''untested version of conversationsForLines that goes through a
string containing newlines instead of an iterable over lines;
kept as it might prove useful for other cases'''
# XXX paragraph is not what you think it is!
paraStart = -1 # start of paragraph containing conversations
convStart = -1 # opening quote
for pos in range(len(text)):
if paraStart == -1:
if text[pos] == '"' and (pos == 0 or text[pos - 1] == '\n'):
paraStart = pos
convStart = pos
else:
if text[pos] == '"':
if convStart == -1:
convStart = pos
else:
yield text[convStart:pos + 1].rstrip('\n')
convStart = -1
if text[pos] == '\n' or pos == len(text) - 1:
if convStart == -1:
paraStart = -1
else:
if text[pos - 1] == '\n':
print('WARNING paragraph ended while in conversation!')
paraStart = -1
convStart = -1
def conversationsForLines(textLines):
# XXX paragraph is not what you think it is!
convStart = -1 # opening quote
record = ''
for line in textLines:
remaining = 0
if record:
if line:
record += '\n'
convStart = 0 # start within this line
else:
print('WARNING paragraph ended while in conversation!')
record = ''
convStart = -1
elif line and line[0] == '"':
convStart = 0
remaining = 1
else:
# conversation has to start at the line start or follow
# another conversation within the line
# otherwise ignore (robustness decision)
continue
for pos in range(remaining, len(line)):
if convStart == -1:
continue
if line[pos] == '"':
if convStart == -1:
convStart = pos
else:
yield record + line[convStart:pos + 1]
record = ''
convStart = -1
if convStart != -1:
record += line[convStart:]
def read(*args, **kwargs):
while True:
try:
yield input(*args, **kwargs)
except EOFError:
return
if __name__ == '__main__':
for conversation in conversationsForLines(read()):
print(conversation)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment