Skip to content

Instantly share code, notes, and snippets.

@montasim
Created January 29, 2025 14:20
Show Gist options
  • Save montasim/d667238ca1b1ba71790d399863f33f10 to your computer and use it in GitHub Desktop.
Save montasim/d667238ca1b1ba71790d399863f33f10 to your computer and use it in GitHub Desktop.
Language-wise Regular Expressions.
import { z } from "zod"; // Ensure Zod is imported
/**
* Regular expressions for various languages.
* Each language key maps to its corresponding Unicode regex pattern.
*/
const languageRegexPatterns = {
english: /^[A-Za-z\s]*$/, // English
bangla: /^[\u0980-\u09FF\s]*$/, // Bangla (Bengali)
hindi: /^[\u0900-\u097F\s]*$/, // Hindi (Devanagari)
arabic: /^[\u0600-\u06FF\s]*$/, // Arabic
urdu: /^[\u0600-\u06FF\s]*$/, // Urdu (Uses Arabic script)
persian: /^[\u0600-\u06FF\s]*$/, // Persian (Farsi, Dari)
pashto: /^[\u0600-\u06FF\s]*$/, // Pashto (Similar to Arabic)
chinese: /^[\u4E00-\u9FFF\s]*$/, // Chinese (Simplified & Traditional)
mandarin: /^[\u4E00-\u9FFF\s]*$/, // Mandarin (Uses Chinese characters)
japanese: /^[\u3040-\u30FF\u4E00-\u9FFF\s]*$/, // Japanese (Hiragana, Katakana, Kanji)
korean: /^[\uAC00-\uD7A3\s]*$/, // Korean (Hangul)
tamil: /^[\u0B80-\u0BFF\s]*$/, // Tamil
telugu: /^[\u0C00-\u0C7F\s]*$/, // Telugu
marathi: /^[\u0900-\u097F\s]*$/, // Marathi (Devanagari)
gujarati: /^[\u0A80-\u0AFF\s]*$/, // Gujarati
punjabi: /^[\u0A00-\u0A7F\s]*$/, // Punjabi (Gurmukhi)
sinhala: /^[\u0D80-\u0DFF\s]*$/, // Sinhala (Sri Lanka)
burmese: /^[\u1000-\u109F\s]*$/, // Burmese (Myanmar)
thai: /^[\u0E00-\u0E7F\s]*$/, // Thai
lao: /^[\u0E80-\u0EFF\s]*$/, // Lao
cambodian: /^[\u1780-\u17FF\s]*$/, // Khmer (Cambodian)
hebrew: /^[\u0590-\u05FF\s]*$/, // Hebrew
greek: /^[\u0370-\u03FF\s]*$/, // Greek
russian: /^[\u0400-\u04FF\s]*$/, // Russian (Cyrillic)
ukrainian: /^[\u0400-\u04FF\s]*$/, // Ukrainian (Cyrillic)
bulgarian: /^[\u0400-\u04FF\s]*$/, // Bulgarian (Cyrillic)
serbian: /^[\u0400-\u04FF\s]*$/, // Serbian (Cyrillic)
georgian: /^[\u10A0-\u10FF\s]*$/, // Georgian
armenian: /^[\u0530-\u058F\s]*$/, // Armenian
vietnamese: /^[\p{Script=Latin}\s]*$/u, // Vietnamese (Uses Latin but with diacritics)
french: /^[\p{Script=Latin}\s]*$/u, // French (Uses Latin script)
spanish: /^[\p{Script=Latin}\s]*$/u, // Spanish (Uses Latin script)
portuguese: /^[\p{Script=Latin}\s]*$/u, // Portuguese (Uses Latin script)
german: /^[\p{Script=Latin}\s]*$/u, // German (Uses Latin script)
italian: /^[\p{Script=Latin}\s]*$/u, // Italian (Uses Latin script)
dutch: /^[\p{Script=Latin}\s]*$/u, // Dutch (Uses Latin script)
polish: /^[\p{Script=Latin}\s]*$/u, // Polish (Uses Latin script)
turkish: /^[\p{Script=Latin}\s]*$/u, // Turkish (Uses Latin script)
malay: /^[\p{Script=Latin}\s]*$/u, // Malay (Uses Latin script)
indonesian: /^[\p{Script=Latin}\s]*$/u, // Indonesian (Uses Latin script)
filipino: /^[\p{Script=Latin}\s]*$/u, // Filipino (Tagalog, Uses Latin script)
};
/**
* Creates a Zod schema for a non-empty string field.
* @param {string} fieldName - The name of the field to validate.
* @returns {z.ZodString} - A Zod schema that requires a non-empty string.
*/
const nonEmptyString = (fieldName) =>
z.string().nonempty(`${fieldName} is required`);
/**
* Creates a Zod schema for validating text in a specific language.
*
* @param {string} fieldName - The name of the field to validate.
* @param {string} languageKey - The key representing the language (e.g., "english", "bangla", "arabic").
* @returns {z.ZodString} - A Zod schema that enforces the language constraint.
* @throws {Error} - Throws an error if the provided language key is invalid.
*
* @example
* const banglaSchema = validLanguageString("Name", "bangla");
* banglaSchema.safeParse("বাংলা নাম"); // ✅ Valid
* banglaSchema.safeParse("English Name"); // ❌ Invalid
*/
const validLanguageString = (fieldName, languageKey) => {
const languageRegex = languageRegexPatterns[languageKey];
if (!languageRegex) {
throw new Error(`Invalid language key: ${languageKey}`);
}
return nonEmptyString(fieldName).refine(
(value) => languageRegex.test(value),
{
message: `${fieldName} must contain only ${languageKey} characters.`,
}
);
};
// ✅ Example Usage:
const banglaSchema = validLanguageString("Name", "bangla");
const englishSchema = validLanguageString("Name", "english");
const arabicSchema = validLanguageString("Name", "arabic");
// 🔹 Test Cases:
console.log(banglaSchema.safeParse("বাংলা নাম")); // ✅ Valid
console.log(englishSchema.safeParse("English Name")); // ✅ Valid
console.log(arabicSchema.safeParse("عربي النص")); // ✅ Valid
console.log(englishSchema.safeParse("বাংলা নাম")); // ❌ Invalid
console.log(banglaSchema.safeParse("English Name")); // ❌ Invalid
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment