Last active
September 25, 2024 08:04
-
-
Save adacola/7f74906816cc46b3d76782be7e50c363 to your computer and use it in GitHub Desktop.
System.Text.Json でシリアライズする際に全角空白などをUnicode表記にせずにそのまま出力するエンコーダーをF#で実装
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// System.Text.Json でシリアライズする際に全角空白などをUnicode表記にせずそのまま出力するエンコーダー | |
// 移植元・参考 : https://gist.github.com/ufcpp/08bc36a4619855bed5b8702107e887ea | |
module Adacola.JavaScriptEncoder | |
#r "nuget: FSharp.Span.Utils" | |
#r "nuget: FSharp.SystemTextJson" | |
open System | |
open System.Text | |
open System.Text.Json | |
open System.Text.Json.Serialization | |
open System.Text.Encodings.Web | |
open System.Collections.Generic | |
open FSharp.NativeInterop | |
open FSharp.Span.Utils | |
#nowarn "9" | |
type NoEscapingEncoder() = | |
inherit JavaScriptEncoder() | |
let escapingBmpChar = seq { '\\'; '\"'; '\r'; '\n'; '\t' } |> HashSet | |
override _.MaxOutputCharactersPerInputCharacter with get() = 12 | |
override this.FindFirstCharacterToEncode (text: nativeptr<char>, textLength: int) = | |
let span = Span.ofPtr text textLength | |
let mutable result = -1 | |
for i = 0 to textLength - 1 do | |
if result = -1 && this.WillEncode(int span[i]) then | |
result <- i | |
result | |
override _.TryEncodeUnicodeScalar (unicodeScalar: int, buffer: nativeptr<char>, bufferLength: int, numberOfCharactersWritten: byref<int>) = | |
let escape (c: char) (buffer: nativeptr<char>) = | |
let span = Span.ofPtr buffer 2 | |
span[0] <- '\\' | |
span[1] <- 'u' | |
let mutable charsWritten = 0 | |
(uint16 c).TryFormat(Span.ofPtr (NativePtr.add buffer 2) 4, &charsWritten, "X4") |> ignore | |
let charUnicodeScalar = char unicodeScalar | |
let escapeChar = | |
match charUnicodeScalar with | |
| '\\' -> '\\' |> ValueSome | |
| '\r' -> 'r' |> ValueSome | |
| '\n' -> 'n' |> ValueSome | |
| '\t' -> 't' |> ValueSome | |
| '\"' -> '\"' |> ValueSome | |
| _ -> ValueNone | |
match escapeChar with | |
| ValueSome c -> | |
if bufferLength < 2 then | |
numberOfCharactersWritten <- 0 | |
false | |
else | |
let span = Span.ofPtr buffer bufferLength | |
span[0] <- '\\' | |
span[1] <- c | |
numberOfCharactersWritten <- 2 | |
true | |
| ValueNone -> | |
if Char.IsControl(charUnicodeScalar) then | |
escape charUnicodeScalar buffer | |
numberOfCharactersWritten <- 6 | |
true | |
elif unicodeScalar > 0xFFFF then | |
if bufferLength < 6 then | |
numberOfCharactersWritten <- 0 | |
false | |
else | |
let rune = Rune(unicodeScalar) | |
let utf16 = SafeLowLevelOperators.stackalloc<char> 2 | |
let len = rune.EncodeToUtf16(utf16) | |
escape utf16[0] buffer | |
numberOfCharactersWritten <- 6 | |
if len > 1 then | |
if bufferLength < 12 then | |
numberOfCharactersWritten <- 0 | |
false | |
else | |
escape utf16[1] (NativePtr.add buffer 6) | |
numberOfCharactersWritten <- 12 | |
true | |
else true | |
else | |
let span = Span.ofPtr buffer bufferLength | |
span[0] <- charUnicodeScalar | |
numberOfCharactersWritten <- 1 | |
true | |
override _.WillEncode (unicodeScalar: int) = | |
Char.IsControl(char unicodeScalar) || unicodeScalar > 0xFFFF || escapingBmpChar.Contains(char unicodeScalar) | |
let NoEscaping = NoEscapingEncoder() | |
module private Test = | |
type A = { Id: int; Name: string } | |
let test() = | |
let original = { Id = 123; Name = "あいう abc \b/\r\n\t\\\'🐈️" } | |
let options = JsonSerializerOptions(Encoder = NoEscaping, WriteIndented = true, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull) | |
let serialized = JsonSerializer.Serialize(original, options) | |
let deserialized = JsonSerializer.Deserialize<A>(serialized, options) | |
stdout.WriteLine $"%A{serialized}" | |
stdout.WriteLine $"%A{deserialized}" | |
for c in deserialized.Name do | |
stdout.WriteLine $"%A{c}: U+{int c:X2}" | |
// Test.test() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment