Last active
January 12, 2021 09:50
-
-
Save chrisvoo/f4d8b57ea0b90fcfa01a to your computer and use it in GitHub Desktop.
GET/POST HTTP request and HTML parsing with Jsoup library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.security.SecureRandom; | |
import java.security.cert.X509Certificate; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.Collections; | |
import java.util.HashSet; | |
import java.util.List; | |
import java.util.Set; | |
import javax.net.ssl.HttpsURLConnection; | |
import javax.net.ssl.SSLContext; | |
import javax.net.ssl.TrustManager; | |
import javax.net.ssl.X509TrustManager; | |
import org.jsoup.Connection; | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Document; | |
import org.jsoup.select.Elements; | |
public class SimpleTests { | |
public static void main(String[] args) { | |
try { | |
// for avoiding javax.net.ssl.SSLProtocolException: handshake alert: unrecognized_name | |
System.setProperty("jsse.enableSNIExtension", "false"); | |
// WARNING: do it only if security isn't important, otherwise you have | |
// to follow this advices: http://stackoverflow.com/a/7745706/1363265 | |
// Create a trust manager that does not validate certificate chains | |
TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager(){ | |
public X509Certificate[] getAcceptedIssuers(){return null;} | |
public void checkClientTrusted(X509Certificate[] certs, String authType){} | |
public void checkServerTrusted(X509Certificate[] certs, String authType){} | |
}}; | |
// Install the all-trusting trust manager | |
try { | |
SSLContext sc = SSLContext.getInstance("TLS"); | |
sc.init(null, trustAllCerts, new SecureRandom()); | |
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory()); | |
} catch (Exception e) { | |
; | |
} | |
// Authentication example | |
// 1. GET request for login form, hidden fields and cookies | |
Connection.Response loginForm = Jsoup.connect(MY_COMPANY_LOGIN_URL) | |
.method(Connection.Method.GET) | |
.execute(); | |
Document doc = loginForm.parse(); | |
// eventually get hidden fields generated dinamically by the server | |
Elements urlFwdEl = doc.getElementsByAttributeValue("name", "urlFwd"); | |
String forwardingUrl = urlFwdEl.get(0).val(); | |
Elements tokenEl = doc.getElementsByAttributeValue("name", "tokenLogin"); | |
String token = tokenEl.get(0).val(); | |
// 2. POST authentication | |
Connection.Response auth = Jsoup.connect(MY_COMPANY_AUTH_URL) | |
.timeout(10*1000) | |
.userAgent("Firefox ...") | |
.referrer(MY_COMPANY_LOGIN_URL) | |
.data("username", USERNAME) | |
.data("password", PASSWORD) | |
.data("urlFwd", forwardingUrl) | |
.data("tokenLogin", token) | |
.cookies(loginForm.cookies()) // important! | |
.method(Connection.Method.POST) | |
.execute(); | |
Document document = auth.parse(); | |
String htmlAuth = document.toString(); | |
Document docPage = Jsoup.parse(htmlAuth); | |
// repeat this methods combination | |
} catch (IOException e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment