Created
January 29, 2025 14:20
-
-
Save montasim/d667238ca1b1ba71790d399863f33f10 to your computer and use it in GitHub Desktop.
Language-wise Regular Expressions.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { z } from "zod"; // Ensure Zod is imported | |
/** | |
* Regular expressions for various languages. | |
* Each language key maps to its corresponding Unicode regex pattern. | |
*/ | |
const languageRegexPatterns = { | |
english: /^[A-Za-z\s]*$/, // English | |
bangla: /^[\u0980-\u09FF\s]*$/, // Bangla (Bengali) | |
hindi: /^[\u0900-\u097F\s]*$/, // Hindi (Devanagari) | |
arabic: /^[\u0600-\u06FF\s]*$/, // Arabic | |
urdu: /^[\u0600-\u06FF\s]*$/, // Urdu (Uses Arabic script) | |
persian: /^[\u0600-\u06FF\s]*$/, // Persian (Farsi, Dari) | |
pashto: /^[\u0600-\u06FF\s]*$/, // Pashto (Similar to Arabic) | |
chinese: /^[\u4E00-\u9FFF\s]*$/, // Chinese (Simplified & Traditional) | |
mandarin: /^[\u4E00-\u9FFF\s]*$/, // Mandarin (Uses Chinese characters) | |
japanese: /^[\u3040-\u30FF\u4E00-\u9FFF\s]*$/, // Japanese (Hiragana, Katakana, Kanji) | |
korean: /^[\uAC00-\uD7A3\s]*$/, // Korean (Hangul) | |
tamil: /^[\u0B80-\u0BFF\s]*$/, // Tamil | |
telugu: /^[\u0C00-\u0C7F\s]*$/, // Telugu | |
marathi: /^[\u0900-\u097F\s]*$/, // Marathi (Devanagari) | |
gujarati: /^[\u0A80-\u0AFF\s]*$/, // Gujarati | |
punjabi: /^[\u0A00-\u0A7F\s]*$/, // Punjabi (Gurmukhi) | |
sinhala: /^[\u0D80-\u0DFF\s]*$/, // Sinhala (Sri Lanka) | |
burmese: /^[\u1000-\u109F\s]*$/, // Burmese (Myanmar) | |
thai: /^[\u0E00-\u0E7F\s]*$/, // Thai | |
lao: /^[\u0E80-\u0EFF\s]*$/, // Lao | |
cambodian: /^[\u1780-\u17FF\s]*$/, // Khmer (Cambodian) | |
hebrew: /^[\u0590-\u05FF\s]*$/, // Hebrew | |
greek: /^[\u0370-\u03FF\s]*$/, // Greek | |
russian: /^[\u0400-\u04FF\s]*$/, // Russian (Cyrillic) | |
ukrainian: /^[\u0400-\u04FF\s]*$/, // Ukrainian (Cyrillic) | |
bulgarian: /^[\u0400-\u04FF\s]*$/, // Bulgarian (Cyrillic) | |
serbian: /^[\u0400-\u04FF\s]*$/, // Serbian (Cyrillic) | |
georgian: /^[\u10A0-\u10FF\s]*$/, // Georgian | |
armenian: /^[\u0530-\u058F\s]*$/, // Armenian | |
vietnamese: /^[\p{Script=Latin}\s]*$/u, // Vietnamese (Uses Latin but with diacritics) | |
french: /^[\p{Script=Latin}\s]*$/u, // French (Uses Latin script) | |
spanish: /^[\p{Script=Latin}\s]*$/u, // Spanish (Uses Latin script) | |
portuguese: /^[\p{Script=Latin}\s]*$/u, // Portuguese (Uses Latin script) | |
german: /^[\p{Script=Latin}\s]*$/u, // German (Uses Latin script) | |
italian: /^[\p{Script=Latin}\s]*$/u, // Italian (Uses Latin script) | |
dutch: /^[\p{Script=Latin}\s]*$/u, // Dutch (Uses Latin script) | |
polish: /^[\p{Script=Latin}\s]*$/u, // Polish (Uses Latin script) | |
turkish: /^[\p{Script=Latin}\s]*$/u, // Turkish (Uses Latin script) | |
malay: /^[\p{Script=Latin}\s]*$/u, // Malay (Uses Latin script) | |
indonesian: /^[\p{Script=Latin}\s]*$/u, // Indonesian (Uses Latin script) | |
filipino: /^[\p{Script=Latin}\s]*$/u, // Filipino (Tagalog, Uses Latin script) | |
}; | |
/** | |
* Creates a Zod schema for a non-empty string field. | |
* @param {string} fieldName - The name of the field to validate. | |
* @returns {z.ZodString} - A Zod schema that requires a non-empty string. | |
*/ | |
const nonEmptyString = (fieldName) => | |
z.string().nonempty(`${fieldName} is required`); | |
/** | |
* Creates a Zod schema for validating text in a specific language. | |
* | |
* @param {string} fieldName - The name of the field to validate. | |
* @param {string} languageKey - The key representing the language (e.g., "english", "bangla", "arabic"). | |
* @returns {z.ZodString} - A Zod schema that enforces the language constraint. | |
* @throws {Error} - Throws an error if the provided language key is invalid. | |
* | |
* @example | |
* const banglaSchema = validLanguageString("Name", "bangla"); | |
* banglaSchema.safeParse("বাংলা নাম"); // ✅ Valid | |
* banglaSchema.safeParse("English Name"); // ❌ Invalid | |
*/ | |
const validLanguageString = (fieldName, languageKey) => { | |
const languageRegex = languageRegexPatterns[languageKey]; | |
if (!languageRegex) { | |
throw new Error(`Invalid language key: ${languageKey}`); | |
} | |
return nonEmptyString(fieldName).refine( | |
(value) => languageRegex.test(value), | |
{ | |
message: `${fieldName} must contain only ${languageKey} characters.`, | |
} | |
); | |
}; | |
// ✅ Example Usage: | |
const banglaSchema = validLanguageString("Name", "bangla"); | |
const englishSchema = validLanguageString("Name", "english"); | |
const arabicSchema = validLanguageString("Name", "arabic"); | |
// 🔹 Test Cases: | |
console.log(banglaSchema.safeParse("বাংলা নাম")); // ✅ Valid | |
console.log(englishSchema.safeParse("English Name")); // ✅ Valid | |
console.log(arabicSchema.safeParse("عربي النص")); // ✅ Valid | |
console.log(englishSchema.safeParse("বাংলা নাম")); // ❌ Invalid | |
console.log(banglaSchema.safeParse("English Name")); // ❌ Invalid |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment