Last active
February 27, 2017 04:37
-
-
Save bamanzi/1e83be7a830ab5ae36c6 to your computer and use it in GitHub Desktop.
add 'pinyin' matching method to percol
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This add a new matching method 'pinyin' to percol(https://github.com/mooz/percol) | |
# In this matching method, first char of each Chinese character's pinyin sequence is used for matching. | |
# For example, 'zw' matching '中文' (ZhongWen), '中午'(ZhongWu), '作为' (ZuoWei) etc. | |
# Usage: | |
# percol --match-method pinyin SomeFileContainingChinese.txt | |
class FinderMultiQueryPinyin(FinderMultiQuery): | |
def get_name(self): | |
return "pinyin" | |
def transform_query(self, needle): | |
try: | |
import pinyin | |
return pinyin.get_initial(needle, "") | |
except: | |
return None | |
def find_query(self, needle, haystack): | |
try: | |
import pinyin | |
haystack_py = pinyin.get_initial(haystack, '') | |
needle_len = len(needle) | |
start = 0 | |
result = [] | |
while True: | |
found = haystack_py.find(needle, start) | |
if found < 0: | |
break | |
result.append((found, needle_len)) | |
start = found + needle_len | |
return result | |
except: | |
return None |
hi bamanzi, i am a python newbie and i got a question, i think you are the only person that could help me (please!)
if i have an array like this:
arr = [1, 2, 3, 4]
i want to use a direct call from percol library from my python program,
how do i list it on percol from python?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
and we need to add some lines into
cli.py