Created
September 29, 2020 17:48
-
-
Save wyl8899/e0f31068681023480e20c34f6b19a275 to your computer and use it in GitHub Desktop.
JavaScript 实现中英文空格 - Implementation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Partial implementation from https://zhuanlan.zhihu.com/p/33612593 */ | |
import _ from 'lodash'; | |
/* 标点 */ | |
const punctuationRegex = /\p{Punctuation}/u; | |
/* 空格 */ | |
const spaceRegex = /\p{Separator}/u; | |
/* CJK 字符,中日韩 */ | |
const cjkRegex = /\p{Script=Han}|\p{Script=Katakana}|\p{Script=Hiragana}|\p{Script=Hangul}/u; | |
const shouldSpace = (a: string, b: string): boolean => { | |
if (cjkRegex.test(a)) { | |
return !(punctuationRegex.test(b) | |
|| spaceRegex.test(b) | |
|| cjkRegex.test(b) | |
); | |
} else { | |
return cjkRegex.test(b) | |
&& !punctuationRegex.test(a) | |
&& !spaceRegex.test(a); | |
} | |
}; | |
const join = ( | |
parts: string[], | |
sepFunc: (a: string, b: string) => string, | |
): string => { | |
return _.reduce<string, string>(parts, (r, p, i) => { | |
const sep = i !== 0 ? sepFunc(p, parts[i - 1]) : ""; | |
return r + sep + p; | |
}, ""); | |
}; | |
export const cjkspace = ( | |
string: TemplateStringsArray, ...subs: string[] | |
): string => { | |
const parts = _.flatten(_.zip(string, subs.concat(""))); | |
const filtered = parts.filter((c) => c !== undefined && c !== "") as string[]; | |
return join( | |
filtered, | |
(a, b) => (shouldSpace(a, b) ? " " : ""), | |
); | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment