yuwash · November 26, 2017 16:40
diff --git a/classifier.py b/classifier.py
 #! /usr/bin/env python3
 from itertools import islice
 from find_conversations import read


 class Census:
    def __init__(self, represent=None):
        self.data = {}
        self.represent = represent

    def __contains__(self, key):
        if self.represent is None:
            return key in self.data
        return self.represent(key) in self.data

    def __getitem__(self, key):
        if self.represent is None:
            return self.data[key]
        return self.data[self.represent(key)]

    def __setitem__(self, key, value):
        if self.represent is None:
            self.data[key] = value
        else:
            self.data[self.represent(key)] = value

    def add(self, item, count=1):
        if item in self:
            self[item] += count
        else:
            self[item] = count

    def count(self, items):
        for item in items:
            self.add(item)

    def __len__(self):
        return sum(self.data.values())

    def min(self):
        return min(self.data)

    def max(self):
        return max(self.data)

    def iterByCount(self):
        remaining = set(self.data.keys())
        while remaining:
            item = max(remaining, key=self.__getitem__)
            yield item
            remaining.remove(item)


 if __name__ == '__main__':
    charsCensus = Census()
    for line in read():
        charsCensus.count(line)
    print('of {}, range: [{},{}]'.format(
        len(charsCensus), ord(charsCensus.min()), ord(charsCensus.max()),
    ))
    print(
        'most frequent characters: '
        + repr(list(islice(charsCensus.iterByCount(), 0, 10))))
diff --git a/find_conversations.py b/find_conversations.py
 #! /usr/bin/env python3


 def conversations(text):
    '''untested version of conversationsForLines that goes through a
    string containing newlines instead of an iterable over lines;
    kept as it might prove useful for other cases'''
    # XXX paragraph is not what you think it is!
    paraStart = -1  # start of paragraph containing conversations
    convStart = -1  # opening quote
    for pos in range(len(text)):
        if paraStart == -1:
            if text[pos] == '"' and (pos == 0 or text[pos - 1] == '\n'):
                paraStart = pos
                convStart = pos
        else:
            if text[pos] == '"':
                if convStart == -1:
                    convStart = pos
                else:
                    yield text[convStart:pos + 1].rstrip('\n')
                    convStart = -1
            if text[pos] == '\n' or pos == len(text) - 1:
                if convStart == -1:
                    paraStart = -1
                else:
                    if text[pos - 1] == '\n':
                        print('WARNING paragraph ended while in conversation!')
                        paraStart = -1
                        convStart = -1


 def conversationsForLines(textLines):
    # XXX paragraph is not what you think it is!
    convStart = -1  # opening quote
    record = ''
    for line in textLines:
        remaining = 0
        if record:
            if line:
                record += '\n'
                convStart = 0  # start within this line
            else:
                print('WARNING paragraph ended while in conversation!')
                record = ''
                convStart = -1
        elif line and line[0] == '"':
            convStart = 0
            remaining = 1
        else:
            # conversation has to start at the line start or follow
            # another conversation within the line
            # otherwise ignore (robustness decision)
            continue
        for pos in range(remaining, len(line)):
            if convStart == -1:
                continue
            if line[pos] == '"':
                if convStart == -1:
                    convStart = pos
                else:
                    yield record + line[convStart:pos + 1]
                    record = ''
                    convStart = -1
        if convStart != -1:
            record += line[convStart:]


 def read(*args, **kwargs):
    while True:
        try:
            yield input(*args, **kwargs)
        except EOFError:
            return


 if __name__ == '__main__':
    for conversation in conversationsForLines(read()):
        print(conversation)
	#! /usr/bin/env python3
	from itertools import islice
	from find_conversations import read


	class Census:
	def __init__(self, represent=None):
	self.data = {}
	self.represent = represent

	def __contains__(self, key):
	if self.represent is None:
	return key in self.data
	return self.represent(key) in self.data

	def __getitem__(self, key):
	if self.represent is None:
	return self.data[key]
	return self.data[self.represent(key)]

	def __setitem__(self, key, value):
	if self.represent is None:
	self.data[key] = value
	else:
	self.data[self.represent(key)] = value

	def add(self, item, count=1):
	if item in self:
	self[item] += count
	else:
	self[item] = count

	def count(self, items):
	for item in items:
	self.add(item)

	def __len__(self):
	return sum(self.data.values())

	def min(self):
	return min(self.data)

	def max(self):
	return max(self.data)

	def iterByCount(self):
	remaining = set(self.data.keys())
	while remaining:
	item = max(remaining, key=self.__getitem__)
	yield item
	remaining.remove(item)


	if __name__ == '__main__':
	charsCensus = Census()
	for line in read():
	charsCensus.count(line)
	print('of {}, range: [{},{}]'.format(
	len(charsCensus), ord(charsCensus.min()), ord(charsCensus.max()),
	))
	print(
	'most frequent characters: '
	+ repr(list(islice(charsCensus.iterByCount(), 0, 10))))
	#! /usr/bin/env python3


	def conversations(text):
	'''untested version of conversationsForLines that goes through a
	string containing newlines instead of an iterable over lines;
	kept as it might prove useful for other cases'''
	# XXX paragraph is not what you think it is!
	paraStart = -1 # start of paragraph containing conversations
	convStart = -1 # opening quote
	for pos in range(len(text)):
	if paraStart == -1:
	if text[pos] == '"' and (pos == 0 or text[pos - 1] == '\n'):
	paraStart = pos
	convStart = pos
	else:
	if text[pos] == '"':
	if convStart == -1:
	convStart = pos
	else:
	yield text[convStart:pos + 1].rstrip('\n')
	convStart = -1
	if text[pos] == '\n' or pos == len(text) - 1:
	if convStart == -1:
	paraStart = -1
	else:
	if text[pos - 1] == '\n':
	print('WARNING paragraph ended while in conversation!')
	paraStart = -1
	convStart = -1


	def conversationsForLines(textLines):
	# XXX paragraph is not what you think it is!
	convStart = -1 # opening quote
	record = ''
	for line in textLines:
	remaining = 0
	if record:
	if line:
	record += '\n'
	convStart = 0 # start within this line
	else:
	print('WARNING paragraph ended while in conversation!')
	record = ''
	convStart = -1
	elif line and line[0] == '"':
	convStart = 0
	remaining = 1
	else:
	# conversation has to start at the line start or follow
	# another conversation within the line
	# otherwise ignore (robustness decision)
	continue
	for pos in range(remaining, len(line)):
	if convStart == -1:
	continue
	if line[pos] == '"':
	if convStart == -1:
	convStart = pos
	else:
	yield record + line[convStart:pos + 1]
	record = ''
	convStart = -1
	if convStart != -1:
	record += line[convStart:]


	def read(args, *kwargs):
	while True:
	try:
	yield input(args, *kwargs)
	except EOFError:
	return


	if __name__ == '__main__':
	for conversation in conversationsForLines(read()):
	print(conversation)