Created
September 21, 2024 23:13
-
-
Save planetis-m/ad078e0e184439e2712f3b853c192d01 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from spellchecker import SpellChecker | |
from prompt_toolkit import PromptSession | |
from prompt_toolkit.completion import Completer, Completion | |
from prompt_toolkit.document import Document | |
from prompt_toolkit.formatted_text import FormattedText | |
from prompt_toolkit.layout.processors import Processor, Transformation, TransformationInput, ConditionalProcessor | |
from prompt_toolkit.application.current import get_app | |
from prompt_toolkit.filters import Condition | |
from functools import lru_cache | |
import hashlib | |
import re | |
# Initialize the spellchecker | |
spell = SpellChecker() | |
# Custom Completer for spellcheck suggestions | |
class SpellCheckCompleter(Completer): | |
def __init__(self, cache_size=128): | |
# Cache to store suggestions for words | |
self.cache = lru_cache(maxsize=cache_size)(self._get_suggestions) | |
def _get_suggestions(self, word: str) -> list[str]: | |
# Fetch suggestions from spell checker for a given word | |
candidates = spell.candidates(word) | |
return list(candidates) if candidates is not None else [] | |
def get_completions(self, document: Document, complete_event) -> Completion: | |
word = document.get_word_before_cursor() | |
if word and word.isalpha(): | |
suggestions = self.cache(word) | |
# Yield completions, starting from the beginning of the word | |
for suggestion in suggestions: | |
yield Completion(suggestion, start_position=-len(word)) | |
# Custom Processor to underline misspelled words | |
class UnderlineMisspelledProcessor(Processor): | |
def __init__(self, cache_size=128): | |
# Regex pattern to match words while ignoring HTML tags, email addresses, and URLs | |
self.word_pattern = re.compile(r''' | |
(?:<[^>]+>)| # HTML tags | |
(?:https?://|www\.)\S+| # Simplified URL pattern | |
(?:\S+@\S+\.\S+)| # Simplified email pattern | |
(\b\w+\b) # Words | |
''', re.VERBOSE) | |
self.cache = lru_cache(maxsize=cache_size)(self._is_misspelled) | |
def _is_misspelled(self, word: str) -> bool: | |
#Check if a word is misspelled using the spell checker. | |
return bool(spell.unknown([word])) | |
def apply_transformation(self, ti: TransformationInput) -> Transformation: | |
text = ti.document.text | |
if not text: | |
return Transformation(FormattedText([])) | |
fragments = [] | |
last_end = 0 | |
# Find all words in the text using the word pattern | |
for match in self.word_pattern.finditer(text): | |
full_match = match.group(0) | |
word = match.group(1) # This will be None for non-word matches | |
start, end = match.span() | |
# Add any text between words that is not part of a word | |
if start > last_end: | |
fragments.append(('', text[last_end:start])) | |
if word and word.isalpha(): | |
is_misspelled = self.cache(word) | |
fragments.append(('underline' if is_misspelled else '', word)) | |
else: | |
# It's a tag, email, or URL, don't check spelling | |
fragments.append(('', full_match)) | |
last_end = end | |
# Add any remaining text after the last word | |
if last_end < len(text): | |
fragments.append(('', text[last_end:])) | |
# Return the transformation with the formatted text | |
return Transformation(FormattedText(fragments)) | |
# Global variable to store the last processed text hash | |
last_text_hash = None | |
# Function to hash the document text | |
def hash_text(text): | |
return hashlib.md5(text.encode()).hexdigest() | |
def buffer_has_changed(): # Works but breaks underlining completely! | |
global last_text_hash | |
current_hash = hash_text(session.default_buffer.text) | |
if current_hash != last_text_hash: | |
last_text_hash = current_hash | |
return True | |
return False | |
# Example usage | |
session = PromptSession( | |
completer=SpellCheckCompleter(), | |
input_processors=[ | |
ConditionalProcessor( | |
UnderlineMisspelledProcessor(), | |
Condition(buffer_has_changed) | |
) | |
] | |
) | |
while True: | |
try: | |
text = session.prompt('Enter text: ', complete_while_typing=True) | |
print(f"You entered: {text}") | |
except KeyboardInterrupt: | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment