Skip to content

Instantly share code, notes, and snippets.

@grumpydev
Created April 19, 2013 14:34
Show Gist options
  • Save grumpydev/5420750 to your computer and use it in GitHub Desktop.
Save grumpydev/5420750 to your computer and use it in GitHub Desktop.
Very simple pluralizer with the potential to handle multiple languages.
namespace ConsoleApplication2
{
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Text.RegularExpressions;
/// <summary>
/// Simplistic pluraliser
/// </summary>
public static class Pluralizer
{
private static readonly CultureInfo murica = new CultureInfo("en-US");
/// <summary>
/// Maps a requested culture to one that can be actually translated
/// </summary>
public static List<Func<CultureInfo, CultureInfo>> CultureMappers { get; private set; }
/// <summary>
/// The pluralization rules for each culture.
/// The delegate should return "null" if not changed, or the new culture if mapped
/// </summary>
public static Dictionary<CultureInfo, List<IPluralizationRule>> Rules { get; private set; }
static Pluralizer()
{
CultureMappers = new List<Func<CultureInfo, CultureInfo>> { c => murica };
// Some of the default rules adapted from Rails https://github.com/rails/rails
var defaultRules = new List<IPluralizationRule>
{
new RegExPluralizationRule("(people)", "people"),
new RegExPluralizationRule("(sheep)", "sheep"),
new RegExPluralizationRule("(fish)", "fish"),
new RegExPluralizationRule("(pe)rson", "$1ople"),
new RegExPluralizationRule("(child)", "$1ren"),
new RegExPluralizationRule("(.*)fe", "$1ves"),
new RegExPluralizationRule("(quiz)$", "$1zes"),
new RegExPluralizationRule("^(oxen)$", "$1"),
new RegExPluralizationRule("^(ox)$", "$1en"),
new RegExPluralizationRule("(m|l)ice$", "$1ice"),
new RegExPluralizationRule("(m|l)ouse$", "$1ice"),
new RegExPluralizationRule("(matr|vert|ind)(?:ix|ex)$", "$1ices"),
new RegExPluralizationRule("(x|ch|ss|sh)$", "$1es"),
new RegExPluralizationRule("([^aeiouy]|qu)y$", "$1ies"),
new RegExPluralizationRule("(hive)$", "$1s"),
new RegExPluralizationRule("(?:([^f])fe|([lr])f)$", "$1$2ves"),
new RegExPluralizationRule("sis$", "ses"),
new RegExPluralizationRule("([ti])a$", "$1a"),
new RegExPluralizationRule("([ti])um$", "$1a"),
new RegExPluralizationRule("(buffal|tomat)o$", "$1oes"),
new RegExPluralizationRule("(bu)s$", "$1ses"),
new RegExPluralizationRule("(alias|status)$", "$1es"),
new RegExPluralizationRule("(octop|vir)i$", "$1i"),
new RegExPluralizationRule("(octop|vir)us$", "$1i"),
new RegExPluralizationRule("(ax|test)is$", "$1es"),
new RegExPluralizationRule("s$", "s"),
new RegExPluralizationRule("$", "s")
};
Rules = new Dictionary<CultureInfo, List<IPluralizationRule>> { { murica, defaultRules } };
}
/// <summary>
/// Attempt to pluralize the string for the given culture
/// </summary>
/// <param name="input">
/// The input string
/// </param>
/// <param name="culture">
/// The culture to attempt to use
/// </param>
/// <returns>
/// A possibly pluralised, string, or a laughable mess, depending on the word :)
/// </returns>
public static string Pluralize(this string input, CultureInfo culture)
{
var output = string.Empty;
var rules = GetRules(culture);
foreach (var pluralizationRule in rules)
{
if (pluralizationRule.TryMatch(input, out output))
{
break;
}
}
return output;
}
/// <summary>
/// Attempt to pluralize the string for the default culture
/// </summary>
/// <param name="input">
/// The input string
/// </param>
/// <returns>
/// A possibly pluralised, string, or a laughable mess, depending on the word :)
/// </returns>
public static string Pluralize(this string input)
{
return input.Pluralize(murica);
}
private static IEnumerable<IPluralizationRule> GetRules(CultureInfo culture)
{
foreach (var cultureMapper in CultureMappers)
{
var result = cultureMapper.Invoke(culture);
if (result != null)
{
if (!Rules.ContainsKey(result))
{
throw new InvalidOperationException(string.Format("Mapped {0} to {1} but no rules found!a", culture, result));
}
return Rules[result];
}
}
throw new InvalidOperationException(string.Format("Unable to map input culture {0} to a set of pluralization rules", culture));
}
/// <summary>
/// Pluralisation rule
/// </summary>
public interface IPluralizationRule
{
/// <summary>
/// Try and match the word
/// </summary>
/// <param name="input">Input word</param>
/// <param name="result">Output to send the pluralized word to</param>
/// <returns>True if pluralized, false otherwise</returns>
bool TryMatch(string input, out string result);
}
/// <summary>
/// Pluralization rule that uses a regex matcher
/// </summary>
public class RegExPluralizationRule : IPluralizationRule
{
private Regex expression;
private string substitution;
/// <summary>
/// Initializes a new instance of the <see cref="RegExPluralizationRule"/> class.
/// </summary>
/// <param name="expression">Regular expression string</param>
/// <param name="substitution">Substitution string</param>
public RegExPluralizationRule(string expression, string substitution)
{
this.expression = new Regex(expression, RegexOptions.Compiled | RegexOptions.IgnoreCase);
this.substitution = substitution;
}
/// <summary>
/// Try and match the word
/// </summary>
/// <param name="input">Input word</param>
/// <param name="result">Output to send the pluralized word to</param>
/// <returns>True if pluralized, false otherwise</returns>
public bool TryMatch(string input, out string result)
{
var replaced = false;
result = this.expression.Replace(
input,
match =>
{
replaced = true;
return match.Result(this.substitution);
});
return replaced;
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment