Created
August 2, 2011 15:01
-
-
Save osima/1120361 to your computer and use it in GitHub Desktop.
tweet splitter for Japanese
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // | |
| // http://www.my-notebook.net/3556cc11-42d5-471e-ba0b-c6946269a118.html | |
| // | |
| package test; | |
| import java.io.BufferedReader; | |
| import java.io.InputStreamReader; | |
| import java.net.URL; | |
| import java.net.URLEncoder; | |
| import java.util.ArrayList; | |
| import org.json.JSONArray; | |
| import org.json.JSONObject; | |
| import org.json.XML; | |
| public class SplitterTest { | |
| private String appid; | |
| public SplitterTest(String appid){ | |
| this.appid=appid; | |
| } | |
| public ArrayList<String> split( String text ) { | |
| ArrayList<String> list=new ArrayList<String>(); | |
| StringBuffer sb=new StringBuffer(); | |
| try{ | |
| // | |
| // 1) Yahoo日本語形態素解析に問い合わせ | |
| // | |
| //String filter="1|2|4|5|9|10"; | |
| String filter="9|10";//名詞と動詞 | |
| //String filter="9";//名詞のみ | |
| String url = "http://jlp.yahooapis.jp/MAService/V1/parse?" + | |
| "appid=" + this.appid + | |
| "&sentence=" + URLEncoder.encode(text, "UTF-8") + | |
| "&response=surface" + | |
| "&filter=" + filter + | |
| "&results=ma"; | |
| URL myurl = new URL(url); | |
| BufferedReader br = new BufferedReader( new InputStreamReader( myurl.openStream() ,"UTF-8") ); | |
| while(true){ | |
| String line = br.readLine(); | |
| if(line==null) | |
| break; | |
| sb.append(line); | |
| } | |
| br.close(); | |
| // | |
| // 2) 結果のXMLをJSONObjectに変換してから、単語を取得して配列にして返す | |
| // | |
| JSONObject obj = XML.toJSONObject(sb.toString()); | |
| //System.out.println(obj); | |
| JSONObject oResultSet = obj.getJSONObject("ResultSet"); | |
| JSONObject oMa = oResultSet.getJSONObject("ma_result"); | |
| JSONObject oWordList = oMa.getJSONObject("word_list"); | |
| JSONArray wordArray = oWordList.getJSONArray("word"); | |
| //System.out.println(wordArray); | |
| for(int i=0; i<wordArray.length(); i++){ | |
| JSONObject o = (JSONObject)wordArray.get(i); | |
| //String word = (String)o.get("surface"); | |
| //list.add( word ); | |
| Object obj = o.get("surface"); | |
| if( obj!=null ){ | |
| String word = obj.toString(); | |
| list.add( word ); | |
| } | |
| } | |
| } | |
| catch(Exception ex){ | |
| ex.printStackTrace(); | |
| } | |
| return list; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment