Skip to content

Instantly share code, notes, and snippets.

@adacola
Last active September 25, 2024 08:04
Show Gist options
  • Save adacola/7f74906816cc46b3d76782be7e50c363 to your computer and use it in GitHub Desktop.
Save adacola/7f74906816cc46b3d76782be7e50c363 to your computer and use it in GitHub Desktop.
System.Text.Json でシリアライズする際に全角空白などをUnicode表記にせずにそのまま出力するエンコーダーをF#で実装
// System.Text.Json でシリアライズする際に全角空白などをUnicode表記にせずそのまま出力するエンコーダー
// 移植元・参考 : https://gist.github.com/ufcpp/08bc36a4619855bed5b8702107e887ea
module Adacola.JavaScriptEncoder
#r "nuget: FSharp.Span.Utils"
#r "nuget: FSharp.SystemTextJson"
open System
open System.Text
open System.Text.Json
open System.Text.Json.Serialization
open System.Text.Encodings.Web
open System.Collections.Generic
open FSharp.NativeInterop
open FSharp.Span.Utils
#nowarn "9"
type NoEscapingEncoder() =
inherit JavaScriptEncoder()
let escapingBmpChar = seq { '\\'; '\"'; '\r'; '\n'; '\t' } |> HashSet
override _.MaxOutputCharactersPerInputCharacter with get() = 12
override this.FindFirstCharacterToEncode (text: nativeptr<char>, textLength: int) =
let span = Span.ofPtr text textLength
let mutable result = -1
for i = 0 to textLength - 1 do
if result = -1 && this.WillEncode(int span[i]) then
result <- i
result
override _.TryEncodeUnicodeScalar (unicodeScalar: int, buffer: nativeptr<char>, bufferLength: int, numberOfCharactersWritten: byref<int>) =
let escape (c: char) (buffer: nativeptr<char>) =
let span = Span.ofPtr buffer 2
span[0] <- '\\'
span[1] <- 'u'
let mutable charsWritten = 0
(uint16 c).TryFormat(Span.ofPtr (NativePtr.add buffer 2) 4, &charsWritten, "X4") |> ignore
let charUnicodeScalar = char unicodeScalar
let escapeChar =
match charUnicodeScalar with
| '\\' -> '\\' |> ValueSome
| '\r' -> 'r' |> ValueSome
| '\n' -> 'n' |> ValueSome
| '\t' -> 't' |> ValueSome
| '\"' -> '\"' |> ValueSome
| _ -> ValueNone
match escapeChar with
| ValueSome c ->
if bufferLength < 2 then
numberOfCharactersWritten <- 0
false
else
let span = Span.ofPtr buffer bufferLength
span[0] <- '\\'
span[1] <- c
numberOfCharactersWritten <- 2
true
| ValueNone ->
if Char.IsControl(charUnicodeScalar) then
escape charUnicodeScalar buffer
numberOfCharactersWritten <- 6
true
elif unicodeScalar > 0xFFFF then
if bufferLength < 6 then
numberOfCharactersWritten <- 0
false
else
let rune = Rune(unicodeScalar)
let utf16 = SafeLowLevelOperators.stackalloc<char> 2
let len = rune.EncodeToUtf16(utf16)
escape utf16[0] buffer
numberOfCharactersWritten <- 6
if len > 1 then
if bufferLength < 12 then
numberOfCharactersWritten <- 0
false
else
escape utf16[1] (NativePtr.add buffer 6)
numberOfCharactersWritten <- 12
true
else true
else
let span = Span.ofPtr buffer bufferLength
span[0] <- charUnicodeScalar
numberOfCharactersWritten <- 1
true
override _.WillEncode (unicodeScalar: int) =
Char.IsControl(char unicodeScalar) || unicodeScalar > 0xFFFF || escapingBmpChar.Contains(char unicodeScalar)
let NoEscaping = NoEscapingEncoder()
module private Test =
type A = { Id: int; Name: string }
let test() =
let original = { Id = 123; Name = "あいう abc \b/\r\n\t\\\'🐈️" }
let options = JsonSerializerOptions(Encoder = NoEscaping, WriteIndented = true, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull)
let serialized = JsonSerializer.Serialize(original, options)
let deserialized = JsonSerializer.Deserialize<A>(serialized, options)
stdout.WriteLine $"%A{serialized}"
stdout.WriteLine $"%A{deserialized}"
for c in deserialized.Name do
stdout.WriteLine $"%A{c}: U+{int c:X2}"
// Test.test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment