Created
March 17, 2016 14:26
-
-
Save okiwan/327aa971108f4b748d29 to your computer and use it in GitHub Desktop.
Conversión de cadena binaria representando texto en Unicode en cadena binaria representando el texto en UTF-8
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Private Function UTF8Encode(b() As Byte) As Byte() | |
| ' Function to convert a Unicode Byte array into a byte array that can be written to create a UTF8 Encoded file. | |
| ' Note the function supports the one, two and three byte UTF8 forms. | |
| ' Note: the MS VBA documentation is confusing. It says the String types only supports single byte charset | |
| ' however, thankfully, it does in fact contain 2 byte Unicode values. | |
| ' Wrote this routine as last resort, tried many ways to get unicode chars to a file or to a shell script call | |
| ' but this was the only way could get to work. | |
| ' RT Perkin | |
| ' 30/10/2015 | |
| Dim b1, b2, b3 As Byte ' UTF8 encoded bytes | |
| Dim u1, u2 As Byte ' Unicode input bytes | |
| Dim out As New Collection ' Collection to build output array | |
| Dim i, j As Integer | |
| Dim unicode As Long | |
| If UBound(b) <= 0 Then | |
| Exit Function | |
| End If | |
| For i = 0 To UBound(b) Step 2 | |
| u1 = b(i) | |
| u2 = b(i + 1) | |
| unicode = u2 * 256 + u1 | |
| If unicode < &H80 Then | |
| ' Boils down to ASCII, one byte UTF-8 | |
| out.Add (u1) | |
| ElseIf unicode < &H800 Then | |
| ' Two byte UTF-8 | |
| ' Code path not tested | |
| b1 = &H80 Or (&H3F And u1) | |
| b2 = &HC0 Or (Int(u1 / 64)) Or ((&H7 And u2) * 4) | |
| out.Add (b2) ' Add most significant byte first | |
| out.Add (b1) | |
| ElseIf unicode < &H10000 Then | |
| ' Three byte UTF-8 | |
| ' Thai chars are in this range | |
| b1 = &H80 Or (&H3F And u1) | |
| b2 = &H80 Or (Int(u1 / 64)) Or ((&HF And u2) * 4) | |
| b3 = &HE0 Or (Int(u2 / 16)) | |
| out.Add (b3) ' Add most significant byte first | |
| out.Add (b2) | |
| out.Add (b1) | |
| Else | |
| ' This case wont arise as VBA strings are 2 byte. Which makes some Unicode codepoints uncodeable. | |
| End If | |
| Next | |
| Dim outBytes() As Byte | |
| ReDim outBytes(1 To out.Count) | |
| For j = 1 To out.Count | |
| outBytes(j) = CByte(out.Item(j)) | |
| Next | |
| UTF8Encode = outBytes | |
| End Function |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment