wolever · October 3, 2022 09:47 · reinoldus · Jun 4, 2020 · Mariownyou · Oct 3, 2022
diff --git a/test_tsquery_escape.py b/test_tsquery_escape.py
 import re
 from nose.tools import assert_equal
 from nose_parameterized import parameterized

 from tsquery_escape import tsquery_escape

 @parameterized([
    ("1 OR 2", "1 | 2"),
    ("(1) 2", "( 1 ) & 2"),
    ("&", "'&':*"),
    ('"hello world"', "'hello world'"),
    ("not 1", "! 1"),
    ("1 and not (2 or 3)", "1 & ! ( 2 | 3 )"),
    ("not and and 1) or ( 2 not", "! 1 | ( 2 )"),
    ("() 1", "1"),
    ("1 2 3", "1 & 2 & 3"),
    ("'&' |", "'&':* & '|':*"),
    (") and 1 (2 or", "1 & ( 2 )"),
    ("it's '", "'its':*"),
    ("(1)", "( 1 )"),
    ("1((", "1"),
 ])
 def test_tsquery_escape(input, expected):
    expected = re.sub("([0-9])", r"'\1':*", expected)
    actual = tsquery_escape(input)
    assert_equal(actual, expected)
diff --git a/tsquery_escape.py b/tsquery_escape.py
 import re

 def tsquery_escape(term):
    """ Escape a query string so it's safe to use with Postgres'
        ``to_tsquery(...)``. Single quotes are ignored, double quoted strings
        are used as literals, and the logical operators 'and', 'or', 'not',
        '(', and ')' can be used:

            >>> tsquery_escape("Hello")
            "'hello':*"
            >>> tsquery_escape('"Quoted string"')
            "'quoted string'"
            >>> tsquery_escape("multiple terms OR another")
            "'multiple':* & 'terms':* | 'another':*"
            >>> tsquery_escape("'\"*|")
            "'\"*|':*"
            >>> tsquery_escape('not foo and (bar or "baz")')
            "! 'foo':* & ( 'bar':* | 'baz' )"
    """

    magic_terms = {
        "and": "&",
        "or": "|",
        "not": "!",
        "OR": "|",
        "AND": "&",
        "NOT": "!",
        "(": "(",
        ")": ")",
    }
    magic_values = set(magic_terms.values())
    paren_count = 0
    res = []
    bits = re.split(r'((?:".*?")|[()])', term)
    for bit in bits:
        if not bit:
            continue
        split_bits = (
            [bit] if bit.startswith('"') and bit.endswith('"') else
            bit.strip().split()
        )
        for bit in split_bits:
            if not bit:
                continue
            if bit in magic_terms:
                bit = magic_terms[bit]
                last = res and res[-1] or ""

                if bit == ")":
                    if last == "(":
                        paren_count -= 1
                        res.pop()
                        continue
                    if paren_count == 0:
                        continue
                    if last in magic_values and last != "(":
                        res.pop()
                elif bit == "|" and last == "&":
                    res.pop()
                elif bit == "!":
                    pass
                elif bit == "(":
                    pass
                elif last in magic_values or not last:
                    continue

                if bit == ")":
                    paren_count -= 1
                elif bit == "(":
                    paren_count += 1

                res.append(bit)
                if bit == ")":
                    res.append("&")
                continue

            bit = bit.replace("'", "")
            if not bit:
                continue

            if bit.startswith('"') and bit.endswith('"'):
                res.append(bit.replace('"', "'"))
            else:
                res.append("'%s':*" %(bit.replace("'", ""), ))
            res.append("&")

    while res and res[-1] in magic_values:
        last = res[-1]
        if last == ")":
            break
        if last == "(":
            paren_count -= 1
        res.pop()
    while paren_count > 0:
        res.append(")")
        paren_count -= 1
    return " ".join(res)
	import re
	from nose.tools import assert_equal
	from nose_parameterized import parameterized

	from tsquery_escape import tsquery_escape

	@parameterized([
	("1 OR 2", "1 \| 2"),
	("(1) 2", "( 1 ) & 2"),
	("&", "'&':*"),
	('"hello world"', "'hello world'"),
	("not 1", "! 1"),
	("1 and not (2 or 3)", "1 & ! ( 2 \| 3 )"),
	("not and and 1) or ( 2 not", "! 1 \| ( 2 )"),
	("() 1", "1"),
	("1 2 3", "1 & 2 & 3"),
	("'&' \|", "'&':* & '\|':*"),
	(") and 1 (2 or", "1 & ( 2 )"),
	("it's '", "'its':*"),
	("(1)", "( 1 )"),
	("1((", "1"),
	])
	def test_tsquery_escape(input, expected):
	expected = re.sub("([0-9])", r"'\1':*", expected)
	actual = tsquery_escape(input)
	assert_equal(actual, expected)
	import re

	def tsquery_escape(term):
	""" Escape a query string so it's safe to use with Postgres'
	``to_tsquery(...)``. Single quotes are ignored, double quoted strings
	are used as literals, and the logical operators 'and', 'or', 'not',
	'(', and ')' can be used:

	>>> tsquery_escape("Hello")
	"'hello':*"
	>>> tsquery_escape('"Quoted string"')
	"'quoted string'"
	>>> tsquery_escape("multiple terms OR another")
	"'multiple':* & 'terms':* \| 'another':*"
	>>> tsquery_escape("'\"*\|")
	"'\"\|':"
	>>> tsquery_escape('not foo and (bar or "baz")')
	"! 'foo':* & ( 'bar':* \| 'baz' )"
	"""

	magic_terms = {
	"and": "&",
	"or": "\|",
	"not": "!",
	"OR": "\|",
	"AND": "&",
	"NOT": "!",
	"(": "(",
	")": ")",
	}
	magic_values = set(magic_terms.values())
	paren_count = 0
	res = []
	bits = re.split(r'((?:".*?")\|[()])', term)
	for bit in bits:
	if not bit:
	continue
	split_bits = (
	[bit] if bit.startswith('"') and bit.endswith('"') else
	bit.strip().split()
	)
	for bit in split_bits:
	if not bit:
	continue
	if bit in magic_terms:
	bit = magic_terms[bit]
	last = res and res[-1] or ""

	if bit == ")":
	if last == "(":
	paren_count -= 1
	res.pop()
	continue
	if paren_count == 0:
	continue
	if last in magic_values and last != "(":
	res.pop()
	elif bit == "\|" and last == "&":
	res.pop()
	elif bit == "!":
	pass
	elif bit == "(":
	pass
	elif last in magic_values or not last:
	continue

	if bit == ")":
	paren_count -= 1
	elif bit == "(":
	paren_count += 1

	res.append(bit)
	if bit == ")":
	res.append("&")
	continue

	bit = bit.replace("'", "")
	if not bit:
	continue

	if bit.startswith('"') and bit.endswith('"'):
	res.append(bit.replace('"', "'"))
	else:
	res.append("'%s':*" %(bit.replace("'", ""), ))
	res.append("&")

	while res and res[-1] in magic_values:
	last = res[-1]
	if last == ")":
	break
	if last == "(":
	paren_count -= 1
	res.pop()
	while paren_count > 0:
	res.append(")")
	paren_count -= 1
	return " ".join(res)