Last active
March 17, 2016 13:34
-
-
Save Viacheslav77/6862e85572d43bc8b715 to your computer and use it in GitHub Desktop.
Вывести на экран все ссылки, которые содержатся в скачанном HTML документе.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Вывести на экран все ссылки, которые содержатся в скачанном HTML документе. | |
| import java.io.BufferedReader; | |
| import java.io.IOException; | |
| import java.io.InputStreamReader; | |
| import java.net.HttpURLConnection; | |
| import java.net.URL; | |
| import java.util.ArrayList; | |
| import java.util.List; | |
| import java.util.regex.Matcher; | |
| import java.util.regex.Pattern; | |
| public class Main { | |
| public static void main(String args[]) throws Exception { | |
| String path = "http://google.com.ua"; | |
| String html = getHTML(path); | |
| String text = "http://.*?\\>"; | |
| getTextURL(html, text); | |
| } | |
| private static void getTextURL(String html, String text) { | |
| Pattern p = Pattern.compile(text, Pattern.CASE_INSENSITIVE); | |
| Matcher m = p.matcher(html); | |
| while (m.find()) { | |
| System.out.println(m.group()); | |
| } | |
| } | |
| private static String getHTML(String urlStr) throws IOException { | |
| URL url = new URL(urlStr); | |
| StringBuilder sb = new StringBuilder(); | |
| HttpURLConnection http = (HttpURLConnection) url.openConnection(); | |
| try (BufferedReader br = new BufferedReader(new InputStreamReader(http.getInputStream()))){ | |
| char[] buf = new char[1000]; | |
| int r = br.read(buf); | |
| do { | |
| if ((r = br.read(buf)) > 0) | |
| sb.append(new String(buf, 0, r)); | |
| } while (r > 0); | |
| } | |
| return sb.toString(); | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| http://www.google.com.ua/imghp?hl=uk&tab=wi"> | |
| http://maps.google.com.ua/maps?hl=uk&tab=wl"> | |
| http://www.youtube.com/?gl=UA&tab=w1"> | |
| http://news.google.com.ua/nwshp?hl=uk&tab=wn"> | |
| http://www.google.com.ua/history/optout?hl=uk" class=gb4> | |
| http://www.google.com.ua/" class=gb4> | |
| http://www.google.com.ua/setprefs?sig=0_yrfjxGvtwGJsHPQ61i9y-S_XSBc%3D&hl=ru&source=homepage" data-ved="0ahUKEwiak5-A97TLAhVoDJoKHXVuBrUQ2ZgBCAU"> | |
| http://www.google.com.ua/intl/uk/ads/"> | |
| http://www.google.com.ua/intl/uk/services/"> | |
| http://www.google.com.ua/setprefdomain?prefdom=US&sig=__pDoFcOfxh55qTCcJwsaeTAu-nfY%3D" id="fehl"> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment