Created
November 27, 2019 08:34
-
-
Save trykopa/52d9625774f1a7846ebc91b42b6e893b to your computer and use it in GitHub Desktop.
HomeWork 12 Task 3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package HomeWork12; | |
/* | |
*3. Напишите программу которая выведет в файл все ссылки | |
*которые содержаться в html документе который будет прислан | |
*в результате запроса на произвольный URL. | |
*/ | |
import java.io.BufferedReader; | |
import java.io.IOException; | |
import java.io.InputStreamReader; | |
import java.net.MalformedURLException; | |
import java.net.URL; | |
import java.util.ArrayList; | |
import java.util.List; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
import java.util.stream.Collectors; | |
public class GetLinks { | |
public static void main(String[] args) throws MalformedURLException { | |
URL testUrl = new URL("http://www.google.com"); | |
List<String> result = grabLinks(testUrl).stream().filter(e -> e.contains("http")).collect(Collectors.toList()); | |
result.forEach(System.out::println); | |
} | |
public static List<String> grabLinks(URL url) { | |
BufferedReader reader = null; | |
List<String> links = new ArrayList<>(); | |
try { | |
reader = new BufferedReader(new InputStreamReader(url.openStream())); | |
String line; | |
while ((line = reader.readLine()) != null) { | |
Pattern p = Pattern.compile("href=\"(.*?)\""); // "href=\"(.*?)\"" | |
Matcher m = p.matcher(line); | |
while (m.find()) { | |
links.add(m.group().replaceAll("href=", "")); | |
} | |
} | |
reader.close(); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} finally { | |
try { | |
reader.close(); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
} | |
return links; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment