Created
October 7, 2016 00:05
-
-
Save j5ik2o/6496540061051d3eecbe1842f0855f1a to your computer and use it in GitHub Desktop.
サロゲート文字を含むユニコード文字を生成するジェネレータの例
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.scalacheck.Gen | |
trait UnicodeGenSupport { | |
// 文字コード範囲 | |
private val unicodeRange: Seq[Char] = Character.MIN_VALUE to Character.MAX_VALUE | |
// 上位サロゲート領域 | |
private val unicodeHighSurrogatePlane: Seq[Char] = Character.MIN_HIGH_SURROGATE to Character.MAX_HIGH_SURROGATE | |
// 下位サロゲート領域 | |
private val unicodeLowSurrogatePlane: Seq[Char] = Character.MIN_LOW_SURROGATE to Character.MAX_LOW_SURROGATE | |
// BMP(=Basic Multilingual Plane, =基本多言語面) | |
private val unicodeBasicMultilingualPlane: Seq[Char] = unicodeRange.diff(unicodeHighSurrogatePlane).diff(unicodeLowSurrogatePlane).filter(e => Character.isDefined(e)) | |
// BMP文字(2バイト)を生成するジェネレータ | |
private val genUnicodeCharacterBasicMultilingualPlane: Gen[Seq[Char]] = Gen.oneOf(unicodeBasicMultilingualPlane).map(Seq(_)) | |
// サロゲート文字(4バイト)を生成するジェネレータ | |
private val genUnicodeCharacterSupplementaryPlane: Gen[Seq[Char]] = for { | |
c1 <- Gen.oneOf(unicodeHighSurrogatePlane) | |
c2 <- Gen.oneOf(unicodeLowSurrogatePlane) | |
} yield { | |
Seq(c1, c2) | |
} | |
// BMP文字とサロゲート文字の頻出率を設定 | |
private val genUnicodeCharacter: Gen[Seq[Char]] = Gen.frequency( | |
9 -> genUnicodeCharacterBasicMultilingualPlane, | |
1 -> genUnicodeCharacterSupplementaryPlane | |
) | |
// サロゲート文字を含むユニコード文字列を生成する。 | |
def genUnicodeString(n: Int): Gen[String] = Gen.listOfN(n, genUnicodeCharacter).map(_.flatten.mkString) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment