Last active
September 23, 2016 03:04
-
-
Save sudodo/e37d660a1c037521f53d6c3bdb0d84fe to your computer and use it in GitHub Desktop.
Detect if string is non sense word.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import mojimoji | |
| def is_non_sense_word(word): | |
| """return True if non sense word like a@~ """ | |
| hankaku_word = mojimoji.zen_to_han(word.decode("utf-8"), kana=False) | |
| #半角記号を含む語は削除 | |
| pattern = re.compile("[!-/:-@[-`{-~]") | |
| if pattern.search(hankaku_word) is not None: | |
| return True | |
| #数字だけ | |
| # pattern = re.compile("^[0-9]{1,}$") | |
| #先頭に数字がある場合 | |
| pattern = re.compile("^[0-9]") | |
| if pattern.match(hankaku_word) is not None: | |
| return True | |
| return False | |
| print is_non_sense_word("あいうえお漢字カナ、") == False | |
| print is_non_sense_word("あいうえおabc") == False | |
| print is_non_sense_word("あいうえお@") == True | |
| print is_non_sense_word("あいうえお#$") == True | |
| print is_non_sense_word("あいうえお1") == False | |
| print is_non_sense_word("11") == True | |
| print is_non_sense_word("230") == True | |
| print is_non_sense_word("1あいうえお1") == True | |
| print is_non_sense_word("あいうえお1") == False | |
| print is_non_sense_word("1あいうえお1") == True |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment