cengizhancaliskan · November 13, 2019 05:55
diff --git a/has_cyrillic.py b/has_cyrillic.py
 # Reference: https://unicode-table.com/en/blocks/
 # https://docs.oracle.com/cd/E29584_01/webhelp/mdex_basicDev/src/rbdv_chars_mapping.html

 import re

 def has_cyrillic(word):
    # https://unicode-table.com/en/blocks/cyrillic/
    # Languages: russian, ukrainian, bulgarian
    return bool(re.search('[\u0400-\u04ff]', word))

 def has_cyrillic2(word):
    # Cyrillic alphabet
    return bool(re.search('[а-яА-Я]', word))

 def has_chinese(word):
    # https://unicode-table.com/en/blocks/cjk-unified-ideographs/
    # Languages: chinese, japanese, korean, vietnamese
    return bool(re.search('[\u4e00-\u9fff]', word))

 def get_non_cyrillic(text):
  return ' '.join([word for word in text.split() if not has_cyrillic(word)])

 def get_non_cyrillic2(text):
  return ' '.join([word for word in text.split() if not has_cyrillic2(word)])

 def get_non_chinese(text):
  return ' '.join([word for word in text.split() if not has_chinese(word)])


 # Example
 get_non_cyrillic("yatak белье")
 get_non_cyrillic2("yatak белье")
 get_non_chinese("馬 yata 马 k馬")
	# Reference: https://unicode-table.com/en/blocks/
	# https://docs.oracle.com/cd/E29584_01/webhelp/mdex_basicDev/src/rbdv_chars_mapping.html

	import re

	def has_cyrillic(word):
	# https://unicode-table.com/en/blocks/cyrillic/
	# Languages: russian, ukrainian, bulgarian
	return bool(re.search('[\u0400-\u04ff]', word))

	def has_cyrillic2(word):
	# Cyrillic alphabet
	return bool(re.search('[а-яА-Я]', word))

	def has_chinese(word):
	# https://unicode-table.com/en/blocks/cjk-unified-ideographs/
	# Languages: chinese, japanese, korean, vietnamese
	return bool(re.search('[\u4e00-\u9fff]', word))

	def get_non_cyrillic(text):
	return ' '.join([word for word in text.split() if not has_cyrillic(word)])

	def get_non_cyrillic2(text):
	return ' '.join([word for word in text.split() if not has_cyrillic2(word)])

	def get_non_chinese(text):
	return ' '.join([word for word in text.split() if not has_chinese(word)])


	# Example
	get_non_cyrillic("yatak белье")
	get_non_cyrillic2("yatak белье")
	get_non_chinese("馬 yata 马 k馬")