Created
June 21, 2014 22:28
-
-
Save czxttkl/87d9ee04d5304aabe747 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package io.metadata; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.net.HttpURLConnection; | |
import java.net.URL; | |
import java.util.Scanner; | |
import org.apache.commons.lang.StringEscapeUtils; | |
public class Downloader { | |
public final static String USER_AGENT_VALUE = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36 CoolNovo/2.0.9.20"; | |
public final static String USER_AGENT = "User-Agent"; | |
public final static int CONNECT_TIMEOUT = 5000; | |
public final static int READ_TIMEOUT = 20000; | |
public final static String GET_METHOD = "GET"; | |
public static String toString(URL mURL) throws IOException { | |
InputStream is = getInputStreamFromUrl(mURL); | |
try (Scanner s = new Scanner(is, "UTF-8")) { | |
// Returns strings with http encoding | |
// return s.useDelimiter("\\A").hasNext() ? s.next() : ""; | |
String htmlString = s.useDelimiter("\\A").hasNext() ? s.next() : ""; | |
return StringEscapeUtils.unescapeHtml(htmlString); | |
} | |
} | |
private static InputStream getInputStreamFromUrl(URL mUrl) throws IOException { | |
HttpURLConnection huc = (HttpURLConnection) mUrl.openConnection(); | |
huc.setConnectTimeout(CONNECT_TIMEOUT); | |
huc.setReadTimeout(READ_TIMEOUT); | |
huc.setRequestMethod(GET_METHOD); | |
huc.setRequestProperty(USER_AGENT, USER_AGENT_VALUE); | |
return huc.getInputStream(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment