Skip to content

Instantly share code, notes, and snippets.

@anderson-pids
Last active February 5, 2021 02:45
Show Gist options
  • Save anderson-pids/f2e63ef796dbeac73ea8c5173144dfd9 to your computer and use it in GitHub Desktop.
Save anderson-pids/f2e63ef796dbeac73ea8c5173144dfd9 to your computer and use it in GitHub Desktop.
normalizer
#include "normalizer.h"
Normalizer::Normalizer()
{
InitHash();
}
Normalizer::~Normalizer()
{
}
void Normalizer::InitHash()
{
hash[-92 ] = 'a'; //ä
hash[-93 ] = 'a'; //ã
hash[-94 ] = 'a'; //â
hash[-95 ] = 'a'; //á
hash[-96 ] = 'a'; //Ã
hash[-124] = 'a'; //Ä
hash[-125] = 'a'; //Ã
hash[-126] = 'a'; //Â
hash[-127] = 'a'; //Á
hash[-128] = 'a'; //À
hash[-85 ] = 'e'; //ë
hash[-86 ] = 'e'; //ê
hash[-87 ] = 'e'; //é
hash[-88 ] = 'e'; //è
hash[-117] = 'e'; //Ë
hash[-118] = 'e'; //Ê
hash[-119] = 'e'; //É
hash[-120] = 'e'; //È
hash[-81 ] = 'i';
hash[-82 ] = 'i';
hash[-83 ] = 'i';
hash[-84 ] = 'i';
hash[-113] = 'i';
hash[-114] = 'i';
hash[-115] = 'i';
hash[-116] = 'i';
hash[-74 ] = 'o';
hash[-75 ] = 'o';
hash[-76 ] = 'o';
hash[-77 ] = 'o';
hash[-78 ] = 'o';
hash[-106] = 'o';
hash[-107] = 'o';
hash[-108] = 'o';
hash[-109] = 'o';
hash[-110] = 'o';
hash[-68 ] = 'u';
hash[-69 ] = 'u';
hash[-70 ] = 'u';
hash[-71 ] = 'u';
hash[-100] = 'u';
hash[-101] = 'u';
hash[-102] = 'u';
hash[-103] = 'u';
hash[-89 ] = 'c';
hash[-121] = 'c';
hash[-79 ] = 'n';
hash[-111] = 'n';
}
string Normalizer::Convert(string term)
{
string::iterator it;
string result="";
int number;
char c;
for(it = term.begin(); it != term.end(); ++it)
{
// number = (*it); //ASCII
// cout << (*it) << ": " << number << endl;
c = *it;
if(*it < 0)
if(*it == -61)
{
*it++;
c = RemoveAccentuation(*it);
}
result += c;
}
return result;
}
char Normalizer::RemoveAccentuation(char ch)
{
if(ch > 0)
return ch;
if(ch == -61)
ch++;
unordered_map<int,char>::iterator it = hash.find(ch);
if(it != hash.end())
return (*it).second;
else
return ch;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment