Eligijus112 · April 22, 2022 19:58
diff --git a/text_preprocesing_embed b/text_preprocesing_embed
 import re

 def clean_text(
    string: str, 
    punctuations=r'''!()-[]{};:'"\,<>./?@#$%^&*_~''',
    stop_words=['the', 'a', 'and', 'is', 'be', 'will']) -> str:
    """
    A method to clean text 
    """
    # Cleaning the urls
    string = re.sub(r'https?://\S+|www\.\S+', '', string)

    # Cleaning the html elements
    string = re.sub(r'<.*?>', '', string)

    # Removing the punctuations
    for x in string.lower(): 
        if x in punctuations: 
            string = string.replace(x, "") 

    # Converting the text to lower
    string = string.lower()

    # Removing stop words
    string = ' '.join([word for word in string.split() if word not in stop_words])

    # Cleaning the whitespaces
    string = re.sub(r'\s+', ' ', string).strip()

    return string
	import re

	def clean_text(
	string: str,
	punctuations=r'''!()-[]{};:'"\,<>./?@#$%^&*_~''',
	stop_words=['the', 'a', 'and', 'is', 'be', 'will']) -> str:
	"""
	A method to clean text
	"""
	# Cleaning the urls
	string = re.sub(r'https?://\S+\|www\.\S+', '', string)

	# Cleaning the html elements
	string = re.sub(r'<.*?>', '', string)

	# Removing the punctuations
	for x in string.lower():
	if x in punctuations:
	string = string.replace(x, "")

	# Converting the text to lower
	string = string.lower()

	# Removing stop words
	string = ' '.join([word for word in string.split() if word not in stop_words])

	# Cleaning the whitespaces
	string = re.sub(r'\s+', ' ', string).strip()

	return string