six2dez · January 12, 2021 11:36
diff --git a/github-endpoints.py b/github-endpoints.py
 #!/usr/bin/python3

 # I don't believe in license.
 # You can do whatever you want with this program.
 # Author : Gwendal Le Coguic
 # Original file: https://github.com/gwen001/github-search/blob/master/github-endpoints.py

 import os
 import sys
 import re
 import time
 import requests
 import random
 import argparse
 from urllib.parse import urlparse
 from functools import partial
 from colored import fg, bg, attr
 from multiprocessing.dummy import Pool

 TOKENS_FILE = os.path.dirname(os.path.realpath(__file__))+'/.github_tokens'
 MIN_LENGTH = 5
 _url_chars = '[a-zA-Z0-9\-\.\?\#\$&@%=_:/\]\[]'
 _not_url_chars = '[^a-zA-Z0-9\-\.\?\#\$&@%=_:/\]\[]'
 t_endpoints = []
 t_exclude = [
    r'^http://$',
    r'^https://$',
    r'^javascript:$',
    r'^tel:$',
    r'^mailto:$',
    r'^application/json$',
    r'^text/plain$',
    r'^text/x-python$',
    r'^text/css$',
    r'^image/png$',
    r'^image/jpeg$',
    r'^image/x-icon$',
    r'^img/favicon.ico$',
    r'^application/x-www-form-urlencoded$',
    r'/Users/[0-9a-zA-Z\-\_]/Desktop',
    r'www.w3.org',
    r'schemas.android.com',
    r'www.apple.com',
    r'\.jpeg',
    r'\.jpg',
    r'\.gif',
    r'\.tif',
    r'\.tiff',
    r'\.ttf',
    r'\.png',
    r'\.css',
    r'\.ico',
    r'\.woff2',
    r'\.woff',
    r'\.swf',
    # r'^#',
    # r'^\?',
    # r'^javascript:',
    # r'^mailto:',
 ]
 t_regexp = [
    r'[\'"\(].*(http[s]?://'+_url_chars+'*?)[\'"\)]',
    r'[\'"\(](http[s]?://'+_url_chars+')',

    r'[\'"\(]('+_url_chars+'+\.sdirect'+_url_chars+'*)',
    r'[\'"\(]('+_url_chars+'+\.htm'+_url_chars+'*)',
    r'[\'"\(]('+_url_chars+'+\.php'+_url_chars+'*)',
    r'[\'"\(]('+_url_chars+'+\.asp'+_url_chars+'*)',
    r'[\'"\(]('+_url_chars+'+\.js'+_url_chars+'*)',
    r'[\'"\(]('+_url_chars+'+\.xml'+_url_chars+'*)',
    r'[\'"\(]('+_url_chars+'+\.ini'+_url_chars+'*)',
    r'[\'"\(]('+_url_chars+'+\.conf'+_url_chars+'*)',
    r'[\'"\(]('+_url_chars+'+\.cfm'+_url_chars+'*)',

    r'href\s*[.=]\s*[\'"]('+_url_chars+'+)',
    r'src\s*[.=]\s*[\'"]('+_url_chars+'+\.js*)',
    r'url\s*[:=]\s*[\'"]('+_url_chars+'+\.js*)',

    r'urlRoot\s*[:=]\s*[\'"]('+_url_chars+'+\.js*)',
    r'endpoint[s]\s*[:=]\s*[\'"]('+_url_chars+'+\.js*)',
    r'script[s]\s*[:=]\s*[\'"]('+_url_chars+'+\.js*)',

    r'\.ajax\s*\(\s*[\'"]('+_url_chars+'+\.js*)',
    r'\.get\s*\(\s*[\'"]('+_url_chars+'+\.js*)',
    r'\.post\s*\(\s*[\'"]('+_url_chars+'+\.js*)',
    r'\.load\s*\(\s*[\'"]('+_url_chars+'+\.js*)',

    ### a bit noisy
    # r'[\'"](' + _url_chars + '+/' + _url_chars + '+)?[\'"]',
    # r'content\s*[.=]\s*[\'"]('+_url_chars+'+)',
 ]

 def githubApiSearchCode( token, search, page, sort, order ):
    headers = { "Authorization":"token "+token }
    url = 'https://api.github.com/search/code?per_page=100&s=' + sort + '&type=Code&o=' + order + '&q=' + search + '&page=' + str(page)
    # print(">>> "+url)

    try:
        r = requests.get( url, headers=headers, timeout=5 )
        json = r.json()
        # print(r.json)
        # print(r.text)
        return json
    except Exception as e:
        print( "%s[-] error occurred: %s%s" % (fg('red'),e,attr(0)) )
        return False



 def getRawUrl( result ):
    raw_url = result['html_url'];
    raw_url = raw_url.replace( 'https://github.com/', 'https://raw.githubusercontent.com/' )
    raw_url = raw_url.replace( '/blob/', '/' )
    return raw_url;


 def readCode( regexp, source, confirm, relative, alldomains, result ):

    time.sleep( random.random() )

    url = getRawUrl( result )
    if url in t_history_urls:
        return

    str = ''
    t_local_endpoints = []
    t_history_urls.append( url )
    code = doGetCode( url )
    # print( code )

    if code:
        if source:
            str = "\n%s>>> %s%s\n\n" % (fg('yellow'),result['html_url'],attr(0))
        matches = re.findall( regexp, code, re.IGNORECASE )
        if matches:
            for r in t_regexp:
                edpt = re.findall( r, code, re.IGNORECASE )
                if edpt:
                    for endpoint in edpt:
                        endpoint = endpoint.strip()
                        if len(endpoint) >= MIN_LENGTH:
                            goodbye = False
                            for exclude in t_exclude:
                                if re.match(exclude,endpoint):
                                    goodbye = True
                                    break
                            if goodbye:
                                continue
                            if endpoint.startswith('http'):
                                is_relative = False
                            else:
                                is_relative = True
                            if not relative and is_relative:
                                continue
                            if endpoint in t_local_endpoints:
                                continue
                            if not source and endpoint in t_endpoints:
                                continue
                            if not alldomains and not is_relative:
                                try:
                                    t_url_parse = urlparse( endpoint )
                                    t_host_parse = tldextract.extract( t_url_parse.netloc )
                                    domain = t_host_parse.domain
                                    sss = re.findall( regexp, t_url_parse.netloc )
                                    if not sss:
                                        continue
                                except Exception as e:
                                    sys.stdout.write( "%s[-] error occurred: %s%s\n" % (fg('red'),e,attr(0)) )

                            t_endpoints.append( endpoint )
                            t_local_endpoints.append( endpoint )
                            if source:
                                str = str + ("%s\n" % endpoint)
                            else:
                                sys.stdout.write( "%s\n" % endpoint )

    if source and len(t_local_endpoints):
        sys.stdout.write( str )


 def doGetCode( url ):
    try:
        r = requests.get( url, timeout=5 )
    except Exception as e:
        sys.stdout.write( "%s[-] error occurred: %s%s\n" % (fg('red'),e,attr(0)) )
        return False

    return r.text


 parser = argparse.ArgumentParser()
 parser.add_argument( "-t","--token",help="your github token (required)" )
 parser.add_argument( "-d","--domain",help="domain you are looking for (required)" )
 parser.add_argument( "-e","--extend",help="also look for <dummy>example.com", action="store_true" )
 parser.add_argument( "-a","--all",help="displays urls of all other domains", action="store_true" )
 parser.add_argument( "-r","--relative",help="also displays relative urls", action="store_true" )
 parser.add_argument( "-s","--source",help="display urls where endpoints are found", action="store_true" )
 parser.add_argument( "-v","--verbose",help="verbose mode, for debugging purpose", action="store_true" )
 parser.parse_args()
 args = parser.parse_args()

 t_tokens = []
 if args.token:
    t_tokens = args.token.split(',')
 else:
    if os.path.isfile(TOKENS_FILE):
        fp = open(TOKENS_FILE,'r')
        t_tokens = fp.read().split("\n")
        fp.close()

 if not len(t_tokens):
    parser.error( 'auth token is missing' )

 if args.source:
    _source = True
 else:
    _source = False

 if args.domain:
    _domain = args.domain
 else:
    parser.error( 'domain is missing' )

 if args.relative:
    _relative = True
 else:
    _relative = False

 if args.all:
    _alldomains = True
 else:
    _alldomains = False

 t_sort_order = [
    { 'sort':'indexed', 'order':'desc',  },
    { 'sort':'indexed', 'order':'asc',  },
    { 'sort':'', 'order':'desc',  }
 ]

 t_history = []
 t_history_urls = []
 _search = '"' + _domain + '"'

 ### this is a test, looks like we got more result that way
 import tldextract
 t_host_parse = tldextract.extract( _domain )

 if args.extend:
    # which one is
    _search = '"' + t_host_parse.domain + '"'
 else:
    # the most effective ?
    _search = '"' + t_host_parse.domain + '.' + t_host_parse.suffix + '"'

 # or simply ?
 # _search = '"' + _domain + '"'
 # print(_search)
 # exit()
 ###


 if args.extend:
    _regexp = r'(([0-9a-z_\-\.]+\.)?([0-9a-z_\-]+)?'+t_host_parse.domain+'([0-9a-z_\-\.]+)?\.[a-z]{1,5})'
    _confirm = t_host_parse.domain
 else:
    _regexp = r'((([0-9a-z_\-\.]+)\.)?' + _domain.replace('.','\.')+')'
    _confirm = _domain

 if args.verbose:
    print( "Search: %s" % _search )
    print( "Regexp: %s" % _regexp)
    print( "Confirm: %s" % readCode)

 for so in t_sort_order:

    page = 1

    if args.verbose:
        print( '\n----- %s %s\n' % (so['sort'],so['order']) )

    while True:

        if args.verbose:
            print("page %d" % page)

        time.sleep( random.random() )
        token = random.choice( t_tokens )
        t_json = githubApiSearchCode( token, _search, page, so['sort'], so['order'] )
        # print(t_json)

        if not t_json or 'documentation_url' in t_json:
            if args.verbose:
                print(t_json)
            t_tokens.remove(token)
            if len(t_tokens) == 0:
                exit()
            continue

        page = page + 1

        if 'items' in t_json and len(t_json['items']):
            pool = Pool( 30 )
            pool.map( partial(readCode,_regexp,_source,_confirm,_relative,_alldomains), t_json['items'] )
            pool.close()
            pool.join()
        else:
            break
	#!/usr/bin/python3

	# I don't believe in license.
	# You can do whatever you want with this program.
	# Author : Gwendal Le Coguic
	# Original file: https://github.com/gwen001/github-search/blob/master/github-endpoints.py

	import os
	import sys
	import re
	import time
	import requests
	import random
	import argparse
	from urllib.parse import urlparse
	from functools import partial
	from colored import fg, bg, attr
	from multiprocessing.dummy import Pool

	TOKENS_FILE = os.path.dirname(os.path.realpath(__file__))+'/.github_tokens'
	MIN_LENGTH = 5
	_url_chars = '[a-zA-Z0-9\-\.\?\#\$&@%=_:/\]\[]'
	_not_url_chars = '[^a-zA-Z0-9\-\.\?\#\$&@%=_:/\]\[]'
	t_endpoints = []
	t_exclude = [
	r'^http://$',
	r'^https://$',
	r'^javascript:$',
	r'^tel:$',
	r'^mailto:$',
	r'^application/json$',
	r'^text/plain$',
	r'^text/x-python$',
	r'^text/css$',
	r'^image/png$',
	r'^image/jpeg$',
	r'^image/x-icon$',
	r'^img/favicon.ico$',
	r'^application/x-www-form-urlencoded$',
	r'/Users/[0-9a-zA-Z\-\_]/Desktop',
	r'www.w3.org',
	r'schemas.android.com',
	r'www.apple.com',
	r'\.jpeg',
	r'\.jpg',
	r'\.gif',
	r'\.tif',
	r'\.tiff',
	r'\.ttf',
	r'\.png',
	r'\.css',
	r'\.ico',
	r'\.woff2',
	r'\.woff',
	r'\.swf',
	# r'^#',
	# r'^\?',
	# r'^javascript:',
	# r'^mailto:',
	]
	t_regexp = [
	r'[\'"\(].(http[s]?://'+_url_chars+'?)[\'"\)]',
	r'[\'"\(](http[s]?://'+_url_chars+')',

	r'[\'"\(]('+_url_chars+'+\.sdirect'+_url_chars+'*)',
	r'[\'"\(]('+_url_chars+'+\.htm'+_url_chars+'*)',
	r'[\'"\(]('+_url_chars+'+\.php'+_url_chars+'*)',
	r'[\'"\(]('+_url_chars+'+\.asp'+_url_chars+'*)',
	r'[\'"\(]('+_url_chars+'+\.js'+_url_chars+'*)',
	r'[\'"\(]('+_url_chars+'+\.xml'+_url_chars+'*)',
	r'[\'"\(]('+_url_chars+'+\.ini'+_url_chars+'*)',
	r'[\'"\(]('+_url_chars+'+\.conf'+_url_chars+'*)',
	r'[\'"\(]('+_url_chars+'+\.cfm'+_url_chars+'*)',

	r'href\s[.=]\s[\'"]('+_url_chars+'+)',
	r'src\s[.=]\s[\'"]('+_url_chars+'+\.js*)',
	r'url\s[:=]\s[\'"]('+_url_chars+'+\.js*)',

	r'urlRoot\s[:=]\s[\'"]('+_url_chars+'+\.js*)',
	r'endpoint[s]\s[:=]\s[\'"]('+_url_chars+'+\.js*)',
	r'script[s]\s[:=]\s[\'"]('+_url_chars+'+\.js*)',

	r'\.ajax\s\(\s[\'"]('+_url_chars+'+\.js*)',
	r'\.get\s\(\s[\'"]('+_url_chars+'+\.js*)',
	r'\.post\s\(\s[\'"]('+_url_chars+'+\.js*)',
	r'\.load\s\(\s[\'"]('+_url_chars+'+\.js*)',

	### a bit noisy
	# r'[\'"](' + _url_chars + '+/' + _url_chars + '+)?[\'"]',
	# r'content\s[.=]\s[\'"]('+_url_chars+'+)',
	]

	def githubApiSearchCode( token, search, page, sort, order ):
	headers = { "Authorization":"token "+token }
	url = 'https://api.github.com/search/code?per_page=100&s=' + sort + '&type=Code&o=' + order + '&q=' + search + '&page=' + str(page)
	# print(">>> "+url)

	try:
	r = requests.get( url, headers=headers, timeout=5 )
	json = r.json()
	# print(r.json)
	# print(r.text)
	return json
	except Exception as e:
	print( "%s[-] error occurred: %s%s" % (fg('red'),e,attr(0)) )
	return False



	def getRawUrl( result ):
	raw_url = result['html_url'];
	raw_url = raw_url.replace( 'https://github.com/', 'https://raw.githubusercontent.com/' )
	raw_url = raw_url.replace( '/blob/', '/' )
	return raw_url;


	def readCode( regexp, source, confirm, relative, alldomains, result ):

	time.sleep( random.random() )

	url = getRawUrl( result )
	if url in t_history_urls:
	return

	str = ''
	t_local_endpoints = []
	t_history_urls.append( url )
	code = doGetCode( url )
	# print( code )

	if code:
	if source:
	str = "\n%s>>> %s%s\n\n" % (fg('yellow'),result['html_url'],attr(0))
	matches = re.findall( regexp, code, re.IGNORECASE )
	if matches:
	for r in t_regexp:
	edpt = re.findall( r, code, re.IGNORECASE )
	if edpt:
	for endpoint in edpt:
	endpoint = endpoint.strip()
	if len(endpoint) >= MIN_LENGTH:
	goodbye = False
	for exclude in t_exclude:
	if re.match(exclude,endpoint):
	goodbye = True
	break
	if goodbye:
	continue
	if endpoint.startswith('http'):
	is_relative = False
	else:
	is_relative = True
	if not relative and is_relative:
	continue
	if endpoint in t_local_endpoints:
	continue
	if not source and endpoint in t_endpoints:
	continue
	if not alldomains and not is_relative:
	try:
	t_url_parse = urlparse( endpoint )
	t_host_parse = tldextract.extract( t_url_parse.netloc )
	domain = t_host_parse.domain
	sss = re.findall( regexp, t_url_parse.netloc )
	if not sss:
	continue
	except Exception as e:
	sys.stdout.write( "%s[-] error occurred: %s%s\n" % (fg('red'),e,attr(0)) )

	t_endpoints.append( endpoint )
	t_local_endpoints.append( endpoint )
	if source:
	str = str + ("%s\n" % endpoint)
	else:
	sys.stdout.write( "%s\n" % endpoint )

	if source and len(t_local_endpoints):
	sys.stdout.write( str )


	def doGetCode( url ):
	try:
	r = requests.get( url, timeout=5 )
	except Exception as e:
	sys.stdout.write( "%s[-] error occurred: %s%s\n" % (fg('red'),e,attr(0)) )
	return False

	return r.text


	parser = argparse.ArgumentParser()
	parser.add_argument( "-t","--token",help="your github token (required)" )
	parser.add_argument( "-d","--domain",help="domain you are looking for (required)" )
	parser.add_argument( "-e","--extend",help="also look for <dummy>example.com", action="store_true" )
	parser.add_argument( "-a","--all",help="displays urls of all other domains", action="store_true" )
	parser.add_argument( "-r","--relative",help="also displays relative urls", action="store_true" )
	parser.add_argument( "-s","--source",help="display urls where endpoints are found", action="store_true" )
	parser.add_argument( "-v","--verbose",help="verbose mode, for debugging purpose", action="store_true" )
	parser.parse_args()
	args = parser.parse_args()

	t_tokens = []
	if args.token:
	t_tokens = args.token.split(',')
	else:
	if os.path.isfile(TOKENS_FILE):
	fp = open(TOKENS_FILE,'r')
	t_tokens = fp.read().split("\n")
	fp.close()

	if not len(t_tokens):
	parser.error( 'auth token is missing' )

	if args.source:
	_source = True
	else:
	_source = False

	if args.domain:
	_domain = args.domain
	else:
	parser.error( 'domain is missing' )

	if args.relative:
	_relative = True
	else:
	_relative = False

	if args.all:
	_alldomains = True
	else:
	_alldomains = False

	t_sort_order = [
	{ 'sort':'indexed', 'order':'desc', },
	{ 'sort':'indexed', 'order':'asc', },
	{ 'sort':'', 'order':'desc', }
	]

	t_history = []
	t_history_urls = []
	_search = '"' + _domain + '"'

	### this is a test, looks like we got more result that way
	import tldextract
	t_host_parse = tldextract.extract( _domain )

	if args.extend:
	# which one is
	_search = '"' + t_host_parse.domain + '"'
	else:
	# the most effective ?
	_search = '"' + t_host_parse.domain + '.' + t_host_parse.suffix + '"'

	# or simply ?
	# _search = '"' + _domain + '"'
	# print(_search)
	# exit()
	###


	if args.extend:
	_regexp = r'(([0-9a-z_\-\.]+\.)?([0-9a-z_\-]+)?'+t_host_parse.domain+'([0-9a-z_\-\.]+)?\.[a-z]{1,5})'
	_confirm = t_host_parse.domain
	else:
	_regexp = r'((([0-9a-z_\-\.]+)\.)?' + _domain.replace('.','\.')+')'
	_confirm = _domain

	if args.verbose:
	print( "Search: %s" % _search )
	print( "Regexp: %s" % _regexp)
	print( "Confirm: %s" % readCode)

	for so in t_sort_order:

	page = 1

	if args.verbose:
	print( '\n----- %s %s\n' % (so['sort'],so['order']) )

	while True:

	if args.verbose:
	print("page %d" % page)

	time.sleep( random.random() )
	token = random.choice( t_tokens )
	t_json = githubApiSearchCode( token, _search, page, so['sort'], so['order'] )
	# print(t_json)

	if not t_json or 'documentation_url' in t_json:
	if args.verbose:
	print(t_json)
	t_tokens.remove(token)
	if len(t_tokens) == 0:
	exit()
	continue

	page = page + 1

	if 'items' in t_json and len(t_json['items']):
	pool = Pool( 30 )
	pool.map( partial(readCode,_regexp,_source,_confirm,_relative,_alldomains), t_json['items'] )
	pool.close()
	pool.join()
	else:
	break