Created
February 26, 2012 19:34
-
-
Save kishida/1918496 to your computer and use it in GitHub Desktop.
Amazonランキングを1時間ごとに取得するJavaコード
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Amazonランキングを1時間ごとに取得する。 | |
* NekoHTMLが必要。 | |
*/ | |
package amazonrank; | |
import java.io.*; | |
import java.net.URL; | |
import java.util.Date; | |
import java.util.concurrent.Executors; | |
import java.util.concurrent.ScheduledExecutorService; | |
import java.util.concurrent.TimeUnit; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
import javax.xml.xpath.XPath; | |
import javax.xml.xpath.XPathExpressionException; | |
import javax.xml.xpath.XPathFactory; | |
import org.cyberneko.html.parsers.DOMParser; | |
import org.w3c.dom.Document; | |
import org.xml.sax.InputSource; | |
import org.xml.sax.SAXException; | |
/** | |
* | |
* @author naoki | |
*/ | |
public class AmazonWatcher { | |
public static void main(String[] args) throws IOException, SAXException, XPathExpressionException{ | |
final String[] urls = {//順位を取得するAmazonURL | |
"http://www.amazon.co.jp/dp/4839914826/", | |
"http://www.amazon.co.jp/dp/4777515486/", | |
"http://www.amazon.co.jp/dp/4839926816/", | |
"http://www.amazon.co.jp/dp/4839932530/", | |
}; | |
final String outfile = "amazon.txt";//出力ファイル名 | |
ScheduledExecutorService exec = Executors.newSingleThreadScheduledExecutor(); | |
exec.scheduleAtFixedRate(new Runnable() { | |
@Override | |
public void run() { | |
try{ | |
FileWriter fw = new FileWriter(outfile, true); | |
PrintWriter pw = new PrintWriter(fw); | |
for(String url : urls){ | |
String result = getAmazonRank(url, new Date()); | |
System.out.println(result); | |
pw.println(result); | |
} | |
}catch(Exception e){ | |
System.out.println(e.getMessage()); | |
} | |
} | |
}, 0, 1, TimeUnit.HOURS); | |
} | |
private static String getAmazonRank(String url, Date today) throws IOException, SAXException, XPathExpressionException{ | |
URL u = new URL(url); | |
InputStream is = u.openStream(); | |
try{ | |
//HTMLからDOMを取得 | |
DOMParser parser = new DOMParser(); | |
parser.setFeature("http://xml.org/sax/features/namespaces", false);//ネームスペースを無効に | |
parser.parse(new InputSource(new InputStreamReader(is, "Shift_JIS"))); | |
Document doc = parser.getDocument(); | |
//XPathの準備 | |
XPathFactory xpf = XPathFactory.newInstance(); | |
XPath xp = xpf.newXPath(); | |
//ランキングを取得 | |
String rankSection = xp.evaluate("//LI[@id='SalesRank']", doc); | |
Pattern p = Pattern.compile("([0-9,]+)位"); | |
Matcher m = p.matcher(rankSection); | |
String rank; | |
if(m.find()){ | |
rank = m.group(1); | |
}else{ | |
rank = "error"; | |
} | |
//タイトルを取得 | |
String title = xp.evaluate("//IMG[@id='prodImage']/@alt", doc); | |
//出力文字列を作成 | |
return String.format("%tY/%<th/%<td %<tH:%<tM:%<tS %s %s %s", today, url, rank, title); | |
}finally{ | |
is.close(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment