Last active
September 5, 2022 09:41
-
-
Save ahopkins/c1533a27c8db5d656a9bcb623b75d1ec to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import annotations | |
import re | |
from dataclasses import dataclass, field | |
from typing import Iterator, List, Optional | |
from urllib.parse import urlencode | |
from rich import print | |
@dataclass(frozen=True) | |
class Expression: | |
VALUE_PATTERN = re.compile(r"\|?(?P<value>\"(?:[^\"]+)\"|(?:\w+))") | |
DEFAULT_FIELD = "*" | |
DEFAULT_OPERATOR = "==" | |
field: str | |
operator: str | |
values: List[str] | |
@classmethod | |
def build(cls, match: re.Match) -> Optional[Expression]: | |
data = match.groupdict() | |
if data["value"]: | |
values = [m.groupdict()["value"].strip("\"'") for m in cls.VALUE_PATTERN.finditer(data["value"])] | |
elif rem := data["remainder"].strip("\"'"): | |
values = [rem] | |
else: | |
return None | |
return cls( | |
field=data["field"] or cls.DEFAULT_FIELD, | |
operator=data["operator"] or cls.DEFAULT_OPERATOR, | |
values=values, | |
) | |
@dataclass(frozen=True) | |
class Query: | |
QUERY_PATTERN = re.compile( | |
r"(?P<field>one|two|three|four)(?::(?P<operator>[=~><!]{0,2})" | |
r"(?P<value>(?:\".+\")|(?:.+?(?:\s+|$))))|(?P<remainder>(?:\".+\")|(?:[^\s]+))" | |
) | |
expressions: List[Expression] = field(default_factory=list) | |
@classmethod | |
def parse(cls, query: str) -> Query: | |
if ":" not in query and '"' not in query: | |
query = f'"{query}"' | |
matches: Iterator[re.Match] = cls.QUERY_PATTERN.finditer(query) | |
expressions = [exp for match in matches if (exp := Expression.build(match))] | |
if not expressions: | |
expressions = [Expression(Expression.DEFAULT_FIELD, Expression.DEFAULT_OPERATOR, [query])] | |
return cls(expressions=expressions) | |
for raw in ( | |
"search stuff", | |
"one:thing", | |
'"search everything"', | |
'"search partial" stuff', | |
"one:item search stuff", | |
'one:thing two:~"This has | a bar" three:<6 four:foo|bar', | |
): | |
query = Query.parse(raw) | |
print(f"\nPARSING: {raw=}", urlencode({"q": raw})) | |
print(query) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment