msukmanowsky · July 22, 2019 20:58
diff --git a/guess_string_separator.py b/guess_string_separator.py
 import re
 from collections import Counter

 def guess_string_separator(
    string: str,
    to_ignore: Iterable[str] = tuple(),
    ignore_empty_strings: bool = True,
 ) -> Optional[str]:
    """Guess a delimiter being used to split a string using term frequencies.

    E.g. "mike, john, bob" -> ", "
    """
    non_word_parts = re.split(r"\w+", string)
    if ignore_empty_strings:
        # get rid of spaces and empty string
        non_word_parts = (part for part in non_word_parts if part.strip())
    if to_ignore:
        non_word_parts = (part for part in non_word_parts if part not in to_ignore)
    term_frequencies = Counter(non_word_parts)
    most_common = [
        (term, count) for (term, count) in term_frequencies.most_common() if count > 1
    ]
    if most_common:
        return most_common[0][0]
    return None
	import re
	from collections import Counter

	def guess_string_separator(
	string: str,
	to_ignore: Iterable[str] = tuple(),
	ignore_empty_strings: bool = True,
	) -> Optional[str]:
	"""Guess a delimiter being used to split a string using term frequencies.

	E.g. "mike, john, bob" -> ", "
	"""
	non_word_parts = re.split(r"\w+", string)
	if ignore_empty_strings:
	# get rid of spaces and empty string
	non_word_parts = (part for part in non_word_parts if part.strip())
	if to_ignore:
	non_word_parts = (part for part in non_word_parts if part not in to_ignore)
	term_frequencies = Counter(non_word_parts)
	most_common = [
	(term, count) for (term, count) in term_frequencies.most_common() if count > 1
	]
	if most_common:
	return most_common[0][0]
	return None
No results found