Created
April 19, 2013 14:34
-
-
Save grumpydev/5420750 to your computer and use it in GitHub Desktop.
Very simple pluralizer with the potential to handle multiple languages.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace ConsoleApplication2 | |
{ | |
using System; | |
using System.Collections.Generic; | |
using System.Globalization; | |
using System.Text.RegularExpressions; | |
/// <summary> | |
/// Simplistic pluraliser | |
/// </summary> | |
public static class Pluralizer | |
{ | |
private static readonly CultureInfo murica = new CultureInfo("en-US"); | |
/// <summary> | |
/// Maps a requested culture to one that can be actually translated | |
/// </summary> | |
public static List<Func<CultureInfo, CultureInfo>> CultureMappers { get; private set; } | |
/// <summary> | |
/// The pluralization rules for each culture. | |
/// The delegate should return "null" if not changed, or the new culture if mapped | |
/// </summary> | |
public static Dictionary<CultureInfo, List<IPluralizationRule>> Rules { get; private set; } | |
static Pluralizer() | |
{ | |
CultureMappers = new List<Func<CultureInfo, CultureInfo>> { c => murica }; | |
// Some of the default rules adapted from Rails https://github.com/rails/rails | |
var defaultRules = new List<IPluralizationRule> | |
{ | |
new RegExPluralizationRule("(people)", "people"), | |
new RegExPluralizationRule("(sheep)", "sheep"), | |
new RegExPluralizationRule("(fish)", "fish"), | |
new RegExPluralizationRule("(pe)rson", "$1ople"), | |
new RegExPluralizationRule("(child)", "$1ren"), | |
new RegExPluralizationRule("(.*)fe", "$1ves"), | |
new RegExPluralizationRule("(quiz)$", "$1zes"), | |
new RegExPluralizationRule("^(oxen)$", "$1"), | |
new RegExPluralizationRule("^(ox)$", "$1en"), | |
new RegExPluralizationRule("(m|l)ice$", "$1ice"), | |
new RegExPluralizationRule("(m|l)ouse$", "$1ice"), | |
new RegExPluralizationRule("(matr|vert|ind)(?:ix|ex)$", "$1ices"), | |
new RegExPluralizationRule("(x|ch|ss|sh)$", "$1es"), | |
new RegExPluralizationRule("([^aeiouy]|qu)y$", "$1ies"), | |
new RegExPluralizationRule("(hive)$", "$1s"), | |
new RegExPluralizationRule("(?:([^f])fe|([lr])f)$", "$1$2ves"), | |
new RegExPluralizationRule("sis$", "ses"), | |
new RegExPluralizationRule("([ti])a$", "$1a"), | |
new RegExPluralizationRule("([ti])um$", "$1a"), | |
new RegExPluralizationRule("(buffal|tomat)o$", "$1oes"), | |
new RegExPluralizationRule("(bu)s$", "$1ses"), | |
new RegExPluralizationRule("(alias|status)$", "$1es"), | |
new RegExPluralizationRule("(octop|vir)i$", "$1i"), | |
new RegExPluralizationRule("(octop|vir)us$", "$1i"), | |
new RegExPluralizationRule("(ax|test)is$", "$1es"), | |
new RegExPluralizationRule("s$", "s"), | |
new RegExPluralizationRule("$", "s") | |
}; | |
Rules = new Dictionary<CultureInfo, List<IPluralizationRule>> { { murica, defaultRules } }; | |
} | |
/// <summary> | |
/// Attempt to pluralize the string for the given culture | |
/// </summary> | |
/// <param name="input"> | |
/// The input string | |
/// </param> | |
/// <param name="culture"> | |
/// The culture to attempt to use | |
/// </param> | |
/// <returns> | |
/// A possibly pluralised, string, or a laughable mess, depending on the word :) | |
/// </returns> | |
public static string Pluralize(this string input, CultureInfo culture) | |
{ | |
var output = string.Empty; | |
var rules = GetRules(culture); | |
foreach (var pluralizationRule in rules) | |
{ | |
if (pluralizationRule.TryMatch(input, out output)) | |
{ | |
break; | |
} | |
} | |
return output; | |
} | |
/// <summary> | |
/// Attempt to pluralize the string for the default culture | |
/// </summary> | |
/// <param name="input"> | |
/// The input string | |
/// </param> | |
/// <returns> | |
/// A possibly pluralised, string, or a laughable mess, depending on the word :) | |
/// </returns> | |
public static string Pluralize(this string input) | |
{ | |
return input.Pluralize(murica); | |
} | |
private static IEnumerable<IPluralizationRule> GetRules(CultureInfo culture) | |
{ | |
foreach (var cultureMapper in CultureMappers) | |
{ | |
var result = cultureMapper.Invoke(culture); | |
if (result != null) | |
{ | |
if (!Rules.ContainsKey(result)) | |
{ | |
throw new InvalidOperationException(string.Format("Mapped {0} to {1} but no rules found!a", culture, result)); | |
} | |
return Rules[result]; | |
} | |
} | |
throw new InvalidOperationException(string.Format("Unable to map input culture {0} to a set of pluralization rules", culture)); | |
} | |
/// <summary> | |
/// Pluralisation rule | |
/// </summary> | |
public interface IPluralizationRule | |
{ | |
/// <summary> | |
/// Try and match the word | |
/// </summary> | |
/// <param name="input">Input word</param> | |
/// <param name="result">Output to send the pluralized word to</param> | |
/// <returns>True if pluralized, false otherwise</returns> | |
bool TryMatch(string input, out string result); | |
} | |
/// <summary> | |
/// Pluralization rule that uses a regex matcher | |
/// </summary> | |
public class RegExPluralizationRule : IPluralizationRule | |
{ | |
private Regex expression; | |
private string substitution; | |
/// <summary> | |
/// Initializes a new instance of the <see cref="RegExPluralizationRule"/> class. | |
/// </summary> | |
/// <param name="expression">Regular expression string</param> | |
/// <param name="substitution">Substitution string</param> | |
public RegExPluralizationRule(string expression, string substitution) | |
{ | |
this.expression = new Regex(expression, RegexOptions.Compiled | RegexOptions.IgnoreCase); | |
this.substitution = substitution; | |
} | |
/// <summary> | |
/// Try and match the word | |
/// </summary> | |
/// <param name="input">Input word</param> | |
/// <param name="result">Output to send the pluralized word to</param> | |
/// <returns>True if pluralized, false otherwise</returns> | |
public bool TryMatch(string input, out string result) | |
{ | |
var replaced = false; | |
result = this.expression.Replace( | |
input, | |
match => | |
{ | |
replaced = true; | |
return match.Result(this.substitution); | |
}); | |
return replaced; | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment