Skip to content

Instantly share code, notes, and snippets.

@osima
Created August 2, 2011 15:01
Show Gist options
  • Select an option

  • Save osima/1120361 to your computer and use it in GitHub Desktop.

Select an option

Save osima/1120361 to your computer and use it in GitHub Desktop.
tweet splitter for Japanese
//
// http://www.my-notebook.net/3556cc11-42d5-471e-ba0b-c6946269a118.html
//
package test;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import org.json.JSONArray;
import org.json.JSONObject;
import org.json.XML;
public class SplitterTest {
private String appid;
public SplitterTest(String appid){
this.appid=appid;
}
public ArrayList<String> split( String text ) {
ArrayList<String> list=new ArrayList<String>();
StringBuffer sb=new StringBuffer();
try{
//
// 1) Yahoo日本語形態素解析に問い合わせ
//
//String filter="1|2|4|5|9|10";
String filter="9|10";//名詞と動詞
//String filter="9";//名詞のみ
String url = "http://jlp.yahooapis.jp/MAService/V1/parse?" +
"appid=" + this.appid +
"&sentence=" + URLEncoder.encode(text, "UTF-8") +
"&response=surface" +
"&filter=" + filter +
"&results=ma";
URL myurl = new URL(url);
BufferedReader br = new BufferedReader( new InputStreamReader( myurl.openStream() ,"UTF-8") );
while(true){
String line = br.readLine();
if(line==null)
break;
sb.append(line);
}
br.close();
//
// 2) 結果のXMLをJSONObjectに変換してから、単語を取得して配列にして返す
//
JSONObject obj = XML.toJSONObject(sb.toString());
//System.out.println(obj);
JSONObject oResultSet = obj.getJSONObject("ResultSet");
JSONObject oMa = oResultSet.getJSONObject("ma_result");
JSONObject oWordList = oMa.getJSONObject("word_list");
JSONArray wordArray = oWordList.getJSONArray("word");
//System.out.println(wordArray);
for(int i=0; i<wordArray.length(); i++){
JSONObject o = (JSONObject)wordArray.get(i);
//String word = (String)o.get("surface");
//list.add( word );
Object obj = o.get("surface");
if( obj!=null ){
String word = obj.toString();
list.add( word );
}
}
}
catch(Exception ex){
ex.printStackTrace();
}
return list;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment