Created
September 15, 2014 06:54
-
-
Save ceekz/98687c266c6ee060f37b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text; | |
using System.Runtime.InteropServices; | |
namespace MecabWrapper | |
{ | |
public class Mecab | |
{ | |
[DllImport(@"C:\Program Files (x86)\MeCab\bin\libmecab64.dll", CallingConvention = CallingConvention.Cdecl)] | |
private extern static IntPtr mecab_new2(string arg); | |
[DllImport(@"C:\Program Files (x86)\MeCab\bin\libmecab64.dll", CallingConvention = CallingConvention.Cdecl)] | |
//[return: MarshalAs(UnmanagedType.AnsiBStr)] | |
//private extern static string mecab_sparse_tostr(IntPtr m, string str); | |
private extern static IntPtr mecab_sparse_tostr(IntPtr m, string str); | |
[DllImport(@"C:\Program Files (x86)\MeCab\bin\libmecab64.dll", CallingConvention = CallingConvention.Cdecl)] | |
private extern static void mecab_destroy(IntPtr m); | |
IntPtr mecabPos; | |
IntPtr mecabWakati; | |
public Mecab() | |
{ | |
mecabPos = mecab_new2(@"--node-format=%M\t%f[0]\t%s\n"); | |
//mecabPos = mecab_new2(@"--node-format=%M\t%f[0],%f[1],%f[2],%f[3],%f[4],%f[5],%f[6]\t%s\n"); | |
mecabWakati = mecab_new2(@"-O wakati"); | |
} | |
~Mecab() | |
{ | |
mecab_destroy(mecabPos); | |
mecab_destroy(mecabWakati); | |
} | |
public List<string> Parse(string str, out List<string> pos, out List<int> oov) | |
{ | |
List<string> result = new List<string>(); | |
pos = new List<string>(); | |
oov = new List<int>(); | |
try | |
{ | |
string[] items = Marshal.PtrToStringAnsi(mecab_sparse_tostr(mecabPos, str)).Split('\n'); | |
foreach (string item in items) | |
{ | |
if (item.Equals("EOS")) | |
{ | |
break; | |
} | |
string[] line = item.Split('\t'); | |
result.Add(line[0]); | |
pos.Add(line[1]); | |
oov.Add(int.Parse(line[2])); | |
} | |
} | |
catch (Exception e) | |
{ | |
Console.Error.WriteLine(string.Format("Mecab.Parse[{0}]", str)); | |
Console.Error.WriteLine(e); | |
Console.Error.WriteLine(); | |
result.Clear(); | |
} | |
return result; | |
} | |
public string Parse(string str) | |
{ | |
try | |
{ | |
return Marshal.PtrToStringAnsi(mecab_sparse_tostr(mecabWakati, str)).Trim(); | |
} | |
catch (Exception e) | |
{ | |
Console.Error.WriteLine(string.Format("Mecab.Parse[{0}]", str)); | |
Console.Error.WriteLine(e); | |
Console.Error.WriteLine(); | |
} | |
return null; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment