prio101 · September 14, 2024 13:54
diff --git a/python b/python
 from langdetect import detect_langs
 from collections import defaultdict

 def split_english_bangla(mixed_string):
    # To hold the detected languages and their corresponding text parts
    language_dict = defaultdict(str)
    
    # Split the string into words (or tokens)
    words = mixed_string.split()
    
    # Detect language for each word
    for word in words:
        try:
            # Detect the language and confidence
            detected_lang = detect_langs(word)[0]  # The most confident language
            lang = detected_lang.lang
            
            # Only keep English ('en') and Bangla ('bn')
            if lang == 'bn':
                language_dict[lang] += word + " "
            else:
                language_dict['en'] += word + " "
        except:
            # In case language detection fails, mark the word as 'unknown'
            language_dict['unknown'] += word + " "
    
    # Return the dictionary of languages and their corresponding text parts
    return {lang: text.strip() for lang, text in language_dict.items()}

 # Example mixed string
 mixed_string = "Hello world! এই পৃথিবী সুন্দর। How are you? তুমি কেমন আছো?"

 # Split and detect languages
 result = split_english_bangla(mixed_string)

 print(result)
	from langdetect import detect_langs
	from collections import defaultdict

	def split_english_bangla(mixed_string):
	# To hold the detected languages and their corresponding text parts
	language_dict = defaultdict(str)

	# Split the string into words (or tokens)
	words = mixed_string.split()

	# Detect language for each word
	for word in words:
	try:
	# Detect the language and confidence
	detected_lang = detect_langs(word)[0] # The most confident language
	lang = detected_lang.lang

	# Only keep English ('en') and Bangla ('bn')
	if lang == 'bn':
	language_dict[lang] += word + " "
	else:
	language_dict['en'] += word + " "
	except:
	# In case language detection fails, mark the word as 'unknown'
	language_dict['unknown'] += word + " "

	# Return the dictionary of languages and their corresponding text parts
	return {lang: text.strip() for lang, text in language_dict.items()}

	# Example mixed string
	mixed_string = "Hello world! এই পৃথিবী সুন্দর। How are you? তুমি কেমন আছো?"

	# Split and detect languages
	result = split_english_bangla(mixed_string)

	print(result)
No results found