yeiichi · April 15, 2025 08:38
diff --git a/ruby_remover.py b/ruby_remover.py
 #!/usr/bin/env python3
 from bs4 import BeautifulSoup

 # Constant for ruby-related annotation tags
 RUBY_ANNOTATION_TAGS = ['rt', 'rp']


 def clean_ruby_annotations(soup: BeautifulSoup) -> BeautifulSoup:
    """
    Cleans a BeautifulSoup object by removing ruby-related annotations.

    This function removes all `<rt>` and `<rp>` tags, and unwraps `<ruby>` tags,
    preserving their base content. It modifies the structure of the HTML to
    eliminate ruby annotation elements while keeping other content intact.

    Args:
        soup (BeautifulSoup): The BeautifulSoup object representing a parsed HTML document.

    Returns:
        BeautifulSoup: The modified BeautifulSoup object with ruby annotations cleaned.
    """
    remove_ruby_tags(soup)
    unwrap_ruby_tags(soup)
    return soup


 def remove_ruby_tags(html_doc: BeautifulSoup) -> None:
    """ Removes `<rt>` and `<rp>` tags from the HTML document. """
    for annotation_tag in html_doc.find_all(RUBY_ANNOTATION_TAGS):
        annotation_tag.decompose()


 def unwrap_ruby_tags(html_doc: BeautifulSoup) -> None:
    """ Unwraps `<ruby>` tags, preserving their inner content. """
    for ruby_tag in html_doc.find_all('ruby'):
        ruby_tag.unwrap()
	#!/usr/bin/env python3
	from bs4 import BeautifulSoup

	# Constant for ruby-related annotation tags
	RUBY_ANNOTATION_TAGS = ['rt', 'rp']


	def clean_ruby_annotations(soup: BeautifulSoup) -> BeautifulSoup:
	"""
	Cleans a BeautifulSoup object by removing ruby-related annotations.

	This function removes all `<rt>` and `<rp>` tags, and unwraps `<ruby>` tags,
	preserving their base content. It modifies the structure of the HTML to
	eliminate ruby annotation elements while keeping other content intact.

	Args:
	soup (BeautifulSoup): The BeautifulSoup object representing a parsed HTML document.

	Returns:
	BeautifulSoup: The modified BeautifulSoup object with ruby annotations cleaned.
	"""
	remove_ruby_tags(soup)
	unwrap_ruby_tags(soup)
	return soup


	def remove_ruby_tags(html_doc: BeautifulSoup) -> None:
	""" Removes `<rt>` and `<rp>` tags from the HTML document. """
	for annotation_tag in html_doc.find_all(RUBY_ANNOTATION_TAGS):
	annotation_tag.decompose()


	def unwrap_ruby_tags(html_doc: BeautifulSoup) -> None:
	""" Unwraps `<ruby>` tags, preserving their inner content. """
	for ruby_tag in html_doc.find_all('ruby'):
	ruby_tag.unwrap()