Last active
August 28, 2017 14:05
-
-
Save maxt3r/0d278472b9a40814888ad847caee2429 to your computer and use it in GitHub Desktop.
Full-text query generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class FullTextUtils | |
{ | |
static HashSet<string> StopWords { get; set; } | |
static FullTextUtils() //static constructor. init stopwords | |
{ | |
try | |
{ | |
using (var cn = DBUtils.GetNewOpenConnection()) | |
{ | |
StopWords = new HashSet<string>(cn.Query<string>("select distinct stopword from sys.fulltext_system_stopwords"), StringComparer.CurrentCultureIgnoreCase); | |
} | |
} | |
catch | |
{ | |
StopWords = new HashSet<string>(StringComparer.CurrentCultureIgnoreCase); | |
} | |
} | |
public static string ToFtsQuery(string query) | |
{ | |
if (string.IsNullOrEmpty(query)) return "\"\""; | |
query = query.Trim(); | |
//if the whole query is in quotes - leave it like this, exact term. | |
if (query.StartsWith("\"") && query.EndsWith("\"")) return query; | |
string ftsQuery = ""; | |
//split query into separate words using white-space unless they are enclosed within "quotes" | |
//enclose each of them into quotes, add an "NEAR" in between | |
var terms = Split(query, ' ', '"').ToArray(); | |
if (terms == null || !terms.Any()) return "\"\""; | |
terms = terms | |
.Select(t => Regex.Replace(t, @"[^\w]", "")) | |
.Where(t => !String.IsNullOrEmpty(t) && !StopWords.Contains(t)).ToArray(); | |
if (!terms.Any()) | |
return "\"\""; | |
ftsQuery = terms.Aggregate((x, y) => x + " NEAR " + y); | |
return ftsQuery; | |
} | |
//splits string by separator EXCEPT for the separator found between quotation marks | |
private static List<String> Split(string myString, char separator, char escapeCharacter) | |
{ | |
int nbEscapeCharactoers = myString.Count(c => c == escapeCharacter); | |
if (nbEscapeCharactoers % 2 != 0) // uneven number of escape characters | |
{ | |
int lastIndex = myString.LastIndexOf("" + escapeCharacter, StringComparison.Ordinal); | |
myString = myString.Remove(lastIndex, 1); // remove the last escape character | |
} | |
var result = myString.Split(escapeCharacter) | |
.Select((element, index) => index % 2 == 0 // If even index | |
? element.Split(new[] { separator }, StringSplitOptions.RemoveEmptyEntries) // Split the item | |
: new string[] { element }) // Keep the entire item | |
.SelectMany(element => element).ToList(); | |
return result; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment