jrwarwick · September 7, 2022 16:34
diff --git a/captive_portal_escape.py b/captive_portal_escape.py
 #!/usr/bin/env ptython
 import re
 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin
 ##optional
 import textwrap
 from datetime import datetime

 """captive_portal_escape.py: Schedulable automatic-ish escape from a wi-fi "captive portal". 
 Useful for situations such as allowing a voice-controlled assistant to remain connected.
 Distributed under the MIT License.
 """

 __author__      = "Justin Warwick <[email protected]>"
 __copyright__   = "Copyright 2022, U.S.A."
 __license__     = "MIT License (https://opensource.org/licenses/MIT)"

 #TODO: suggested crontab line, include alternating log/rolling
 #TODO: optional parameterization for base_URL, just assume google for default case

 # go to google or whatever, to elicit a captive portal page. do we want a fallback as well?
 # search this http://katana.int.bry.com/demo/abutton.html
 # to get this: http://katana.int.bry.com/cgi-bin/pwd.pl
 # plus appropripate method and do it.

 #TODO: .netrc or some kind of graceful/best-effort .captive_portal_credentials.json read-in if found.
 payload = {
    'username': 'JDoe',
    'password': 'please'
 }
 base_URL = "https://www.google.com/"
 login_URL= ""
 captivity = True  # ASSUMPTION to start

 print (datetime.now() ) 
 with requests.Session() as ht_session:
    g = ht_session.get(base_URL)
    #DEBUG#this is the whole document source# print (g.text)
    doc = BeautifulSoup(g.text, 'html.parser')
    
    # First, do some kind of basic check to see if we /did/ make it to our litmus uri (i.e., not captive currently)
    for elem in doc.find_all():
        # a little junky debug stuff...
        print(elem.name) 
        for interesting_attribute in "name","description","alt","title","description","role":
            if elem.get(interesting_attribute): 
                print("\t  "+elem.get(interesting_attribute), end="")
        # And here is the "litmus" for punching through and actually getting base_URL.
        if elem.get('name') == "q" and "Search" in elem.get('title'):
            captivity = False
    print("\n")

    if not captivity:
        print("Hmm.. ok, actually, I do not believe we are captive!")
    else:
        #TODO: at least scan for crazy javascript that might be an obstacle.
        for button in doc.find_all('button'):
            if re.search(button.text,"accept|agree|confirm|submit", flags=re.IGNORECASE):
                print("\tCandidate button FOUND:  " + button.text)
                print( button.parent.name )
                # walk back up until you find the ancestor form.
                ancestor_element = button.parent
                while ancestor_element.name != "form" and ancestor_element.name != "body":
                    print("\t\ttag: " + ancestor_element.name)
                    ancestor_element = ancestor_element.parent
    
                if ancestor_element.name == "form":
                    print("ok, so hopfully action is...")
                    print(ancestor_element.get('action'))
                    login_URL = ancestor_element.get('action')
                    #TODO: should validate this string for uri-ishness
                    #TODO: discover, record, and honor http method of the form.
    
                else:
                    print("No reasonable matching element found! Maybe portal page has weird html structure?")

        if not login_URL:
            #TODO:  also try again with "<input type='submit'/> :
            for button in doc.find_all('input',type="submit"):
                if button.get("type") == "submit":
                    print("\tCandidate old-style input-submit FOUND, 'value':  " + button.get('value'))
                    print("\thowever, doing something about that is #TODO :(")
        
        if login_URL.startswith("http"):
            print("Extracted URI appears to be an absolute URI, just roll with it...")
        else:
            login_URL = urljoin(base_URL, login_URL)
            print("Ok, revised login URI (because it was relative) : " + login_URL)
        #TODO: seek out <base> tag and allow that to override implicit base.
        if login_URL == base_URL:
            print("Actually, probably this is not going to work. The base URL and login URL being the same is usually a sign of soft failure.")
    
        print("\nHere goes; attempting to induce acceptance/confirmation/whatever and escape captivity...\n")
        r = ht_session.get(login_URL)
        print ("  ----\n" + textwrap.indent(r.text,"  |  ") + "  ----\n")
    
        #p = ht_session.post(login_URL, data=payload)
        #print (p.text)
    
        #r = ht_session.post("http://katana.int.bry.com/cgi-bin/pwd.pl")
        #print (r.text)
	#!/usr/bin/env ptython
	import re
	import requests
	from bs4 import BeautifulSoup
	from urllib.parse import urljoin
	##optional
	import textwrap
	from datetime import datetime

	"""captive_portal_escape.py: Schedulable automatic-ish escape from a wi-fi "captive portal".
	Useful for situations such as allowing a voice-controlled assistant to remain connected.
	Distributed under the MIT License.
	"""

	__author__ = "Justin Warwick <[email protected]>"
	__copyright__ = "Copyright 2022, U.S.A."
	__license__ = "MIT License (https://opensource.org/licenses/MIT)"

	#TODO: suggested crontab line, include alternating log/rolling
	#TODO: optional parameterization for base_URL, just assume google for default case

	# go to google or whatever, to elicit a captive portal page. do we want a fallback as well?
	# search this http://katana.int.bry.com/demo/abutton.html
	# to get this: http://katana.int.bry.com/cgi-bin/pwd.pl
	# plus appropripate method and do it.

	#TODO: .netrc or some kind of graceful/best-effort .captive_portal_credentials.json read-in if found.
	payload = {
	'username': 'JDoe',
	'password': 'please'
	}
	base_URL = "https://www.google.com/"
	login_URL= ""
	captivity = True # ASSUMPTION to start

	print (datetime.now() )
	with requests.Session() as ht_session:
	g = ht_session.get(base_URL)
	#DEBUG#this is the whole document source# print (g.text)
	doc = BeautifulSoup(g.text, 'html.parser')

	# First, do some kind of basic check to see if we /did/ make it to our litmus uri (i.e., not captive currently)
	for elem in doc.find_all():
	# a little junky debug stuff...
	print(elem.name)
	for interesting_attribute in "name","description","alt","title","description","role":
	if elem.get(interesting_attribute):
	print("\t "+elem.get(interesting_attribute), end="")
	# And here is the "litmus" for punching through and actually getting base_URL.
	if elem.get('name') == "q" and "Search" in elem.get('title'):
	captivity = False
	print("\n")

	if not captivity:
	print("Hmm.. ok, actually, I do not believe we are captive!")
	else:
	#TODO: at least scan for crazy javascript that might be an obstacle.
	for button in doc.find_all('button'):
	if re.search(button.text,"accept\|agree\|confirm\|submit", flags=re.IGNORECASE):
	print("\tCandidate button FOUND: " + button.text)
	print( button.parent.name )
	# walk back up until you find the ancestor form.
	ancestor_element = button.parent
	while ancestor_element.name != "form" and ancestor_element.name != "body":
	print("\t\ttag: " + ancestor_element.name)
	ancestor_element = ancestor_element.parent

	if ancestor_element.name == "form":
	print("ok, so hopfully action is...")
	print(ancestor_element.get('action'))
	login_URL = ancestor_element.get('action')
	#TODO: should validate this string for uri-ishness
	#TODO: discover, record, and honor http method of the form.

	else:
	print("No reasonable matching element found! Maybe portal page has weird html structure?")

	if not login_URL:
	#TODO: also try again with "<input type='submit'/> :
	for button in doc.find_all('input',type="submit"):
	if button.get("type") == "submit":
	print("\tCandidate old-style input-submit FOUND, 'value': " + button.get('value'))
	print("\thowever, doing something about that is #TODO :(")

	if login_URL.startswith("http"):
	print("Extracted URI appears to be an absolute URI, just roll with it...")
	else:
	login_URL = urljoin(base_URL, login_URL)
	print("Ok, revised login URI (because it was relative) : " + login_URL)
	#TODO: seek out <base> tag and allow that to override implicit base.
	if login_URL == base_URL:
	print("Actually, probably this is not going to work. The base URL and login URL being the same is usually a sign of soft failure.")

	print("\nHere goes; attempting to induce acceptance/confirmation/whatever and escape captivity...\n")
	r = ht_session.get(login_URL)
	print (" ----\n" + textwrap.indent(r.text," \| ") + " ----\n")

	#p = ht_session.post(login_URL, data=payload)
	#print (p.text)

	#r = ht_session.post("http://katana.int.bry.com/cgi-bin/pwd.pl")
	#print (r.text)