Skip to content

Instantly share code, notes, and snippets.

@jrwarwick
Last active September 7, 2022 16:34
Show Gist options
  • Save jrwarwick/4c5aa48832d7c04840a0e7c330941584 to your computer and use it in GitHub Desktop.
Save jrwarwick/4c5aa48832d7c04840a0e7c330941584 to your computer and use it in GitHub Desktop.
Wi-Fi Captive Portal (Automatable) Escape
#!/usr/bin/env ptython
import re
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
##optional
import textwrap
from datetime import datetime
"""captive_portal_escape.py: Schedulable automatic-ish escape from a wi-fi "captive portal".
Useful for situations such as allowing a voice-controlled assistant to remain connected.
Distributed under the MIT License.
"""
__author__ = "Justin Warwick <[email protected]>"
__copyright__ = "Copyright 2022, U.S.A."
__license__ = "MIT License (https://opensource.org/licenses/MIT)"
#TODO: suggested crontab line, include alternating log/rolling
#TODO: optional parameterization for base_URL, just assume google for default case
# go to google or whatever, to elicit a captive portal page. do we want a fallback as well?
# search this http://katana.int.bry.com/demo/abutton.html
# to get this: http://katana.int.bry.com/cgi-bin/pwd.pl
# plus appropripate method and do it.
#TODO: .netrc or some kind of graceful/best-effort .captive_portal_credentials.json read-in if found.
payload = {
'username': 'JDoe',
'password': 'please'
}
base_URL = "https://www.google.com/"
login_URL= ""
captivity = True # ASSUMPTION to start
print (datetime.now() )
with requests.Session() as ht_session:
g = ht_session.get(base_URL)
#DEBUG#this is the whole document source# print (g.text)
doc = BeautifulSoup(g.text, 'html.parser')
# First, do some kind of basic check to see if we /did/ make it to our litmus uri (i.e., not captive currently)
for elem in doc.find_all():
# a little junky debug stuff...
print(elem.name)
for interesting_attribute in "name","description","alt","title","description","role":
if elem.get(interesting_attribute):
print("\t "+elem.get(interesting_attribute), end="")
# And here is the "litmus" for punching through and actually getting base_URL.
if elem.get('name') == "q" and "Search" in elem.get('title'):
captivity = False
print("\n")
if not captivity:
print("Hmm.. ok, actually, I do not believe we are captive!")
else:
#TODO: at least scan for crazy javascript that might be an obstacle.
for button in doc.find_all('button'):
if re.search(button.text,"accept|agree|confirm|submit", flags=re.IGNORECASE):
print("\tCandidate button FOUND: " + button.text)
print( button.parent.name )
# walk back up until you find the ancestor form.
ancestor_element = button.parent
while ancestor_element.name != "form" and ancestor_element.name != "body":
print("\t\ttag: " + ancestor_element.name)
ancestor_element = ancestor_element.parent
if ancestor_element.name == "form":
print("ok, so hopfully action is...")
print(ancestor_element.get('action'))
login_URL = ancestor_element.get('action')
#TODO: should validate this string for uri-ishness
#TODO: discover, record, and honor http method of the form.
else:
print("No reasonable matching element found! Maybe portal page has weird html structure?")
if not login_URL:
#TODO: also try again with "<input type='submit'/> :
for button in doc.find_all('input',type="submit"):
if button.get("type") == "submit":
print("\tCandidate old-style input-submit FOUND, 'value': " + button.get('value'))
print("\thowever, doing something about that is #TODO :(")
if login_URL.startswith("http"):
print("Extracted URI appears to be an absolute URI, just roll with it...")
else:
login_URL = urljoin(base_URL, login_URL)
print("Ok, revised login URI (because it was relative) : " + login_URL)
#TODO: seek out <base> tag and allow that to override implicit base.
if login_URL == base_URL:
print("Actually, probably this is not going to work. The base URL and login URL being the same is usually a sign of soft failure.")
print("\nHere goes; attempting to induce acceptance/confirmation/whatever and escape captivity...\n")
r = ht_session.get(login_URL)
print (" ----\n" + textwrap.indent(r.text," | ") + " ----\n")
#p = ht_session.post(login_URL, data=payload)
#print (p.text)
#r = ht_session.post("http://katana.int.bry.com/cgi-bin/pwd.pl")
#print (r.text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment