Created
February 24, 2012 12:20
-
-
Save youtalk/1900624 to your computer and use it in GitHub Desktop.
Web search using Yahoo Web Search API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package jp.youtalk; | |
import java.io.BufferedReader; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.io.InputStreamReader; | |
import java.io.StringReader; | |
import java.io.UnsupportedEncodingException; | |
import java.net.HttpURLConnection; | |
import java.net.URL; | |
import java.net.URLEncoder; | |
import java.text.ParseException; | |
import java.util.LinkedList; | |
import java.util.List; | |
import javax.xml.parsers.DocumentBuilderFactory; | |
import javax.xml.parsers.ParserConfigurationException; | |
import javax.xml.xpath.XPath; | |
import javax.xml.xpath.XPathConstants; | |
import javax.xml.xpath.XPathExpressionException; | |
import javax.xml.xpath.XPathFactory; | |
import org.w3c.dom.Document; | |
import org.w3c.dom.NodeList; | |
import org.xml.sax.InputSource; | |
import org.xml.sax.SAXException; | |
public class YahooWebSearchAPI { | |
private final String appid; | |
private final int size; | |
private final String url = "http://search.yahooapis.jp/WebSearchService/V2/webSearch"; | |
private final String charset = "UTF-8"; | |
private final XPath xpath = XPathFactory.newInstance().newXPath(); | |
public YahooWebSearchAPI(final String appid, final int size) { | |
this.appid = appid; | |
this.size = size; | |
} | |
public synchronized List<String> searchWikipediaSummary(final String query) { | |
List<String> results = new LinkedList<String>(); | |
try { | |
String xml = search(new URL(makeQuery(query + "+Wikipedia"))); | |
Document doc = xmlToDocument(xml); | |
for (int i = 0, size = size(doc, "ResultSet/Result"); i < size; i++) { | |
String s = value(doc, "ResultSet/Result[" + (i + 1) + "]/Summary"); | |
while (s.indexOf("(") != -1 && s.indexOf(")") != -1) // remove rubies | |
s = s.substring(0, s.indexOf("(")) + s.substring(s.indexOf(")") + 1, s.length() - 1); | |
if (s.indexOf("。") != -1) // extract leading one sentence | |
s = s.substring(0, s.indexOf("。") + 1); | |
results.add(s.trim().replace(" ", "")); | |
} | |
} catch (Exception e) { | |
e.printStackTrace(); | |
return searchWikipediaSummary(query); // try again | |
} | |
return results; | |
} | |
public static void main(final String[] args) { | |
YahooWebSearchAPI api = new YahooWebSearchAPI("appid here", 5); | |
List<String> results = api.searchWikipediaSummary("奈良"); | |
for (String r: results) | |
System.out.println(r); | |
} | |
private String makeQuery(final String sentence) | |
throws UnsupportedEncodingException { | |
return url + "?appid=" + appid + | |
"&query=" + URLEncoder.encode(sentence, charset) + "&results=" + size; | |
} | |
private int size(final Document doc, final String expression) | |
throws XPathExpressionException { | |
NodeList list = (NodeList) xpath.evaluate(expression, doc, XPathConstants.NODESET); | |
return list.getLength(); | |
} | |
private String value(final Document doc, final String expression) | |
throws XPathExpressionException { | |
return xpath.evaluate(expression, doc); | |
} | |
private Document xmlToDocument(final String xml) | |
throws IOException, SAXException, ParserConfigurationException { | |
StringReader sr = new StringReader(xml); | |
InputSource is = new InputSource(sr); | |
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(is); | |
return doc; | |
} | |
private String search(final URL url) | |
throws IOException, ParseException { | |
HttpURLConnection connection = (HttpURLConnection) url.openConnection(); | |
connection.setRequestMethod("GET"); | |
connection.setDoOutput(true); | |
connection.connect(); | |
InputStream is = connection.getInputStream(); | |
InputStreamReader isr = new InputStreamReader(is, charset); | |
BufferedReader br = new BufferedReader(isr); | |
StringBuilder sb = new StringBuilder(); | |
String line; | |
while ((line = br.readLine()) != null) { | |
sb.append(line); | |
sb.append("\n"); | |
} | |
br.close(); | |
connection.disconnect(); | |
return sb.toString(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment