kishida · February 26, 2012 19:34
diff --git a/AmazonWatcher.java b/AmazonWatcher.java
 /*
 * Amazonランキングを1時間ごとに取得する。
 * NekoHTMLが必要。
 */
 package amazonrank;

 import java.io.*;
 import java.net.URL;
 import java.util.Date;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathExpressionException;
 import javax.xml.xpath.XPathFactory;
 import org.cyberneko.html.parsers.DOMParser;
 import org.w3c.dom.Document;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;

 /**
 *
 * @author naoki
 */
 public class AmazonWatcher {
    public static void main(String[] args) throws IOException, SAXException, XPathExpressionException{
        final String[] urls = {//順位を取得するAmazonURL
            "http://www.amazon.co.jp/dp/4839914826/",
            "http://www.amazon.co.jp/dp/4777515486/",
            "http://www.amazon.co.jp/dp/4839926816/",
            "http://www.amazon.co.jp/dp/4839932530/",
        };
        final String outfile = "amazon.txt";//出力ファイル名
        ScheduledExecutorService exec = Executors.newSingleThreadScheduledExecutor();
        exec.scheduleAtFixedRate(new Runnable() {
            @Override
            public void run() {
                try{
                    FileWriter fw = new FileWriter(outfile, true);
                    PrintWriter pw = new PrintWriter(fw);
                    for(String url : urls){
                        String result = getAmazonRank(url, new Date());
                        System.out.println(result);
                        pw.println(result);
                    }
                }catch(Exception e){
                    System.out.println(e.getMessage());
                }
            }
        }, 0, 1, TimeUnit.HOURS);
    }

    private static String getAmazonRank(String url, Date today) throws IOException, SAXException, XPathExpressionException{
        URL u = new URL(url);
        InputStream is = u.openStream();
        try{
            //HTMLからDOMを取得
            DOMParser parser = new DOMParser();
            parser.setFeature("http://xml.org/sax/features/namespaces", false);//ネームスペースを無効に
            parser.parse(new InputSource(new InputStreamReader(is, "Shift_JIS")));
            Document doc = parser.getDocument();
            //XPathの準備
            XPathFactory xpf = XPathFactory.newInstance();
            XPath xp = xpf.newXPath();

            //ランキングを取得
            String rankSection = xp.evaluate("//LI[@id='SalesRank']", doc);
            Pattern p = Pattern.compile("([0-9,]+)位");
            Matcher m = p.matcher(rankSection);
            String rank;
            if(m.find()){
                rank = m.group(1);
            }else{
                rank = "error";
            }
            //タイトルを取得
            String title = xp.evaluate("//IMG[@id='prodImage']/@alt", doc);
            //出力文字列を作成
            return String.format("%tY/%<th/%<td %<tH:%<tM:%<tS %s %s %s", today, url, rank, title);
        }finally{
            is.close();
        }
    }
 }
	/*
	* Amazonランキングを1時間ごとに取得する。
	* NekoHTMLが必要。
	*/
	package amazonrank;

	import java.io.*;
	import java.net.URL;
	import java.util.Date;
	import java.util.concurrent.Executors;
	import java.util.concurrent.ScheduledExecutorService;
	import java.util.concurrent.TimeUnit;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;
	import javax.xml.xpath.XPath;
	import javax.xml.xpath.XPathExpressionException;
	import javax.xml.xpath.XPathFactory;
	import org.cyberneko.html.parsers.DOMParser;
	import org.w3c.dom.Document;
	import org.xml.sax.InputSource;
	import org.xml.sax.SAXException;

	/**
	*
	* @author naoki
	*/
	public class AmazonWatcher {
	public static void main(String[] args) throws IOException, SAXException, XPathExpressionException{
	final String[] urls = {//順位を取得するAmazonURL
	"http://www.amazon.co.jp/dp/4839914826/",
	"http://www.amazon.co.jp/dp/4777515486/",
	"http://www.amazon.co.jp/dp/4839926816/",
	"http://www.amazon.co.jp/dp/4839932530/",
	};
	final String outfile = "amazon.txt";//出力ファイル名
	ScheduledExecutorService exec = Executors.newSingleThreadScheduledExecutor();
	exec.scheduleAtFixedRate(new Runnable() {
	@Override
	public void run() {
	try{
	FileWriter fw = new FileWriter(outfile, true);
	PrintWriter pw = new PrintWriter(fw);
	for(String url : urls){
	String result = getAmazonRank(url, new Date());
	System.out.println(result);
	pw.println(result);
	}
	}catch(Exception e){
	System.out.println(e.getMessage());
	}
	}
	}, 0, 1, TimeUnit.HOURS);
	}

	private static String getAmazonRank(String url, Date today) throws IOException, SAXException, XPathExpressionException{
	URL u = new URL(url);
	InputStream is = u.openStream();
	try{
	//HTMLからDOMを取得
	DOMParser parser = new DOMParser();
	parser.setFeature("http://xml.org/sax/features/namespaces", false);//ネームスペースを無効に
	parser.parse(new InputSource(new InputStreamReader(is, "Shift_JIS")));
	Document doc = parser.getDocument();
	//XPathの準備
	XPathFactory xpf = XPathFactory.newInstance();
	XPath xp = xpf.newXPath();

	//ランキングを取得
	String rankSection = xp.evaluate("//LI[@id='SalesRank']", doc);
	Pattern p = Pattern.compile("([0-9,]+)位");
	Matcher m = p.matcher(rankSection);
	String rank;
	if(m.find()){
	rank = m.group(1);
	}else{
	rank = "error";
	}
	//タイトルを取得
	String title = xp.evaluate("//IMG[@id='prodImage']/@alt", doc);
	//出力文字列を作成
	return String.format("%tY/%<th/%<td %<tH:%<tM:%<tS %s %s %s", today, url, rank, title);
	}finally{
	is.close();
	}
	}
	}