Skip to content

Instantly share code, notes, and snippets.

@Viacheslav77
Last active March 17, 2016 13:34
Show Gist options
  • Select an option

  • Save Viacheslav77/6862e85572d43bc8b715 to your computer and use it in GitHub Desktop.

Select an option

Save Viacheslav77/6862e85572d43bc8b715 to your computer and use it in GitHub Desktop.
Вывести на экран все ссылки, которые содержатся в скачанном HTML документе.
// Вывести на экран все ссылки, которые содержатся в скачанном HTML документе.
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String args[]) throws Exception {
String path = "http://google.com.ua";
String html = getHTML(path);
String text = "http://.*?\\>";
getTextURL(html, text);
}
private static void getTextURL(String html, String text) {
Pattern p = Pattern.compile(text, Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(html);
while (m.find()) {
System.out.println(m.group());
}
}
private static String getHTML(String urlStr) throws IOException {
URL url = new URL(urlStr);
StringBuilder sb = new StringBuilder();
HttpURLConnection http = (HttpURLConnection) url.openConnection();
try (BufferedReader br = new BufferedReader(new InputStreamReader(http.getInputStream()))){
char[] buf = new char[1000];
int r = br.read(buf);
do {
if ((r = br.read(buf)) > 0)
sb.append(new String(buf, 0, r));
} while (r > 0);
}
return sb.toString();
}
}
http://www.google.com.ua/imghp?hl=uk&tab=wi">
http://maps.google.com.ua/maps?hl=uk&tab=wl">
http://www.youtube.com/?gl=UA&tab=w1">
http://news.google.com.ua/nwshp?hl=uk&tab=wn">
http://www.google.com.ua/history/optout?hl=uk" class=gb4>
http://www.google.com.ua/" class=gb4>
http://www.google.com.ua/setprefs?sig=0_yrfjxGvtwGJsHPQ61i9y-S_XSBc%3D&hl=ru&source=homepage" data-ved="0ahUKEwiak5-A97TLAhVoDJoKHXVuBrUQ2ZgBCAU">
http://www.google.com.ua/intl/uk/ads/">
http://www.google.com.ua/intl/uk/services/">
http://www.google.com.ua/setprefdomain?prefdom=US&sig=__pDoFcOfxh55qTCcJwsaeTAu-nfY%3D" id="fehl">
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment