Skip to content

Instantly share code, notes, and snippets.

@generall
Last active August 29, 2015 13:57
Show Gist options
  • Save generall/9711803 to your computer and use it in GitHub Desktop.
Save generall/9711803 to your computer and use it in GitHub Desktop.
load from mystem
void CCorpus::loadWithMystem(const std::string& fname)
{
std::system(("./mystem -e UTF-8 -n -l -i -w " + fname + " > " + fname + ".m").c_str());
std::ifstream from_mystem((fname + ".m").c_str());
std::string line;
std::string temp;
while (std::getline(from_mystem, line))
{
auto n = line.find("|");
if (n != std::string::npos)
{
temp = line.substr(0, n);
}
else
{
temp = line;
}
n = temp.find("=");
if (n != std::string::npos)
{
CWord t;
t.str = temp.substr(0, n);
temp = temp.substr(n);
switch (temp[1])
{
case 'S':
t.type = temp[2] == ',' ? ADJ : OTHER;
break;
case 'V':
t.type = VERB;
break;
case 'A':
t.type = temp[2] == '=' ? ADJ : OTHER;
break;
default:
t.type = OTHER;
break;
}
data.push_back(t);
}
else
{
CWord t;
t.str = temp.substr(0, temp.find("?"));
t.type = OTHER;
data.push_back(t);
}
}
from_mystem.close();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment