Created
September 21, 2025 04:31
-
-
Save light-traveller/9bfc93892c1a59dda87aeff1d39ba630 to your computer and use it in GitHub Desktop.
T-SQL script that removes all non-digit characters from a string and normalizes digits from various Unicode ranges (Arabic-Indic, Persian, Fullwidth, etc.) to ASCII 0–9. Works for NVARCHAR strings of any length using a tally table.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --------------- | |
| -- Test Data -- | |
| --------------- | |
| declare @data table (v nvarchar(100)); | |
| insert into @data values | |
| ('1234567890'), -- clean data | |
| ('12345-67890'), -- middle non-digit character | |
| ('12345 67890'), | |
| ('12345x67890'), | |
| ('12345ax 67890'), -- middle string | |
| (' 1234567890'), -- leading spaces | |
| ('1234567890 '), -- trailing spaces | |
| (' 1234567890 '), -- leading & trailing spaces | |
| (' '), -- single space string | |
| (''), -- empty string | |
| (null), | |
| (N'۱۲۳۴۵۶۷۸۹۰abc'), -- Persian digits + Latin | |
| (N'١٢٣٤٥٦٧٨٩٠ 漢字'), -- Arabic-Indic digits + Chinese | |
| (N'1234567890漢字'); -- Fullwidth digits + Chinese | |
| ----------------------------------------------------------------------- | |
| -- Removes non-digit characters and normalizes digits to ASCII [0-9] -- | |
| ----------------------------------------------------------------------- | |
| ;with numbers as ( | |
| select number + 1 as n | |
| from master..spt_values | |
| where type = 'P' and number < 1000 -- this number has to be equal or more than max string length | |
| ) | |
| select v, | |
| ( | |
| select | |
| case | |
| when u between 48 and 57 then nchar(u) | |
| when u between 1632 and 1641 then nchar(u - 1632 + 48) | |
| when u between 1776 and 1785 then nchar(u - 1776 + 48) | |
| when u between 65296 and 65305 then nchar(u - 65296 + 48) | |
| else null | |
| end | |
| from ( | |
| select unicode(substring(isnull(v,''), n, 1)) as u, n | |
| from numbers | |
| where n <= len(isnull(v,'')) | |
| ) as chars | |
| where u between 48 and 57 | |
| or u between 1632 and 1641 | |
| or u between 1776 and 1785 | |
| or u between 65296 and 65305 | |
| order by n | |
| for xml path(''), type | |
| ).value('.', 'nvarchar(max)') as cleaned | |
| from @data; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment